diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1d880c7..ef2040e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -1,5 +1,10 @@ name: Unit tests +on: + pull_request: + branches: [ main ] + workflow_dispatch: + jobs: uv-example: name: python @@ -13,6 +18,9 @@ jobs: - name: Set up Python run: uv python install + + - name: Install the project + run: uv sync --all-extras --dev - name: Set up PostgreSQL run: | @@ -20,4 +28,8 @@ jobs: - name: Run pytest run: | - uv python -m pytest \ No newline at end of file + uv run pytest + + - name: Dump docker logs + if: failure() + uses: jwalton/gh-docker-logs@v2 \ No newline at end of file diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..fc03d5d --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,23 @@ +name: Ruff + +on: + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + with: + args: format --check + + ruff-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + with: + args: check \ No newline at end of file diff --git a/.gitignore b/.gitignore index a257c75..763987b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +###################### +# Project .gitignore # +###################### + + scratch/ *.ipynb_checkpoints @@ -9,16 +14,22 @@ scratch/ .tmp/ notebooks/tmp* + +########################## +# Boilerplate .gitignore # +########################## + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +*$py.class # C extensions *.so # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -30,9 +41,12 @@ lib64/ parts/ sdist/ var/ +wheels/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -47,12 +61,17 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ # Translations *.mo @@ -60,41 +79,106 @@ coverage.xml # Django stuff: *.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy # Sphinx documentation docs/_build/ # PyBuilder +.pybuilder/ target/ -# DotEnv configuration +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments .env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ -# Database -*.db -*.rdb - -# Pycharm -.idea +# Spyder project settings +.spyderproject +.spyproject -# VS Code -.vscode/ +# Rope project settings +.ropeproject -# Spyder -.spyproject/ +# mkdocs documentation +/site -# Jupyter NB Checkpoints -.ipynb_checkpoints/ +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json -# Mac OS-specific storage files -.DS_Store +# Pyre type checker +.pyre/ -# vim -*.swp -*.swo +# pytype static type analyzer +.pytype/ -# Mypy cache -.mypy_cache/ +# Cython debug symbols +cython_debug/ -# Theia -.theia +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..45f8baf --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "python.testing.pytestArgs": [ + "test" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff", + "ruff.runOnSave": true +} \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index b9609f0..0000000 --- a/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -.PHONY: cmf clean environment linux_requirements python_requirements requirements precommit test - -################################################################################# -# GLOBALS # -################################################################################# - -PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -PROJECT_NAME = company_matching -SENSITIVE_PROJECT = no -PYTHON_VERSION = 3.9 -PYTHON_INTERPRETER = python - -NOW:=$(shell date +"%m-%d-%y_%H-%M-%S") - -################################################################################# -# COMMANDS # -################################################################################# - -## Make datasets table -cmf: - uv run $(PYTHON_INTERPRETER) cmf/admin.py - - -## Delete all compiled Python files -clean: - find . -type f -name "*.py[co]" -delete - find . -type d -name "__pycache__" -delete - - -## Reformat and lint -format: - uv run ruff format . - uv run ruff check . --fix - - -## Run Python tests -test: - docker compose up db -d --wait - uv run pytest - - -################################################################################# -# Self Documenting Commands # -################################################################################# - -.DEFAULT_GOAL := help - -# Inspired by -# sed script explained: -# /^##/: -# * save line in hold space -# * purge line -# * Loop: -# * append newline + line to hold space -# * go to next line -# * if line starts with doc comment, strip comment character off and loop -# * remove target prerequisites -# * append hold space (+ newline) to line -# * replace newline plus comments by `---` -# * print line -# Separate expressions are necessary because labels cannot be delimited by -# semicolon; see -.PHONY: help -help: - @echo "$$(tput bold)Available rules:$$(tput sgr0)" - @echo - @sed -n -e "/^## / { \ - h; \ - s/.*//; \ - :doc" \ - -e "H; \ - n; \ - s/^## //; \ - t doc" \ - -e "s/:.*//; \ - G; \ - s/\\n## /---/; \ - s/\\n/ /g; \ - p; \ - }" ${MAKEFILE_LIST} \ - | LC_ALL='C' sort --ignore-case \ - | awk -F '---' \ - -v ncol=$$(tput cols) \ - -v indent=19 \ - -v col_on="$$(tput setaf 6)" \ - -v col_off="$$(tput sgr0)" \ - '{ \ - printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ - n = split($$2, words, " "); \ - line_length = ncol - indent; \ - for (i = 1; i <= n; i++) { \ - line_length -= length(words[i]) + 1; \ - if (line_length <= 0) { \ - line_length = ncol - indent - length(words[i]) - 1; \ - printf "\n%*s ", -indent, " "; \ - } \ - printf "%s ", words[i]; \ - } \ - printf "\n"; \ - }' \ - | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/README.md b/README.md index d35d1aa..e9aaebb 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Record matching is a chore. We aim to: This project is managed by [uv](https://docs.astral.sh/uv/), linted and formated with [ruff](https://docs.astral.sh/ruff/), and tested with [pytest](https://docs.pytest.org/en/stable/). -Task running is done with [make](https://www.gnu.org/software/make/). To see all available commands: +Task running is done with [just](https://just.systems/man/en/). To see all available commands: ```console -make +just -l ``` diff --git a/cmf/dedupers/__init__.py b/cmf/dedupers/__init__.py deleted file mode 100644 index 703163d..0000000 --- a/cmf/dedupers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from cmf.dedupers.naive import NaiveDeduper - -__all__ = ("NaiveDeduper",) diff --git a/cmf/helpers/__init__.py b/cmf/helpers/__init__.py deleted file mode 100644 index 1ad901a..0000000 --- a/cmf/helpers/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from cmf.helpers.cleaner import cleaner, cleaners -from cmf.helpers.comparison import comparison -from cmf.helpers.deletion import delete_model -from cmf.helpers.selector import selector, selectors -from cmf.helpers.visualisation import draw_model_tree - -__all__ = ( - # Cleaners - "cleaner", - "cleaners", - # Comparisons - "comparison", - # Selectors - "selector", - "selectors", - # Visualisation - "draw_model_tree", - # Deletion - "delete_model", -) diff --git a/cmf/linkers/__init__.py b/cmf/linkers/__init__.py deleted file mode 100644 index b56b819..0000000 --- a/cmf/linkers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from cmf.linkers.deterministic import DeterministicLinker -from cmf.linkers.splinklinker import SplinkLinker -from cmf.linkers.weighteddeterministic import WeightedDeterministicLinker - -__all__ = ("DeterministicLinker", "WeightedDeterministicLinker", "SplinkLinker") diff --git a/justfile b/justfile new file mode 100644 index 0000000..561a003 --- /dev/null +++ b/justfile @@ -0,0 +1,18 @@ +# Make datasets table +matchbox: + uv run python src/matchbox/admin.py + +# Delete all compiled Python files +clean: + find . -type f -name "*.py[co]" -delete + find . -type d -name "__pycache__" -delete + +# Reformat and lint +format: + uv run ruff format . + uv run ruff check . --fix + +# Run Python tests +test: + docker compose up db -d --wait + uv run pytest diff --git a/notebooks/.gitkeep b/notebooks/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/notebooks/engineering/WL_CC.ipynb b/notebooks/engineering/WL_CC.ipynb deleted file mode 100644 index 0cf6904..0000000 --- a/notebooks/engineering/WL_CC.ipynb +++ /dev/null @@ -1,671 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "id": "123cf1dc-6310-4183-b12a-0879b927047b", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: https://s3-eu-west-2.amazonaws.com/mirrors.notebook.uktrade.io/pypi/\n", - "Collecting dwutils@ git+ssh://****@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest\n", - " Cloning ssh://****@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git (to revision latest) to /tmp/pip-install-e41jcl0i/dwutils_f4b1526497354be2bfcac10880e133e4\n", - " Running command git clone --filter=blob:none --quiet 'ssh://****@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git' /tmp/pip-install-e41jcl0i/dwutils_f4b1526497354be2bfcac10880e133e4\n", - " Resolved ssh://****@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git to commit 20144945565fe9e71c91311da3401156e12095ed\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hRequirement already satisfied: gitpython in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.1.42)\n", - "Requirement already satisfied: mlflow-skinny==2.10.* in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.10.2)\n", - "Requirement already satisfied: scipy in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.12.0)\n", - "Requirement already satisfied: pandas in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.1.0)\n", - "Requirement already satisfied: psycopg2-binary in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.9.7)\n", - "Requirement already satisfied: pyarrow in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (15.0.1)\n", - "Requirement already satisfied: sqlalchemy in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.0.20)\n", - "Requirement already satisfied: boto3 in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.34.58)\n", - "Requirement already satisfied: tomli in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.0.1)\n", - "Requirement already satisfied: tqdm in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.62.3)\n", - "Requirement already satisfied: git-lfs-http-mirror in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.0.7)\n", - "Requirement already satisfied: nltk in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.8.1)\n", - "Requirement already satisfied: deprecation in /opt/conda/lib/python3.9/site-packages (from dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.1.0)\n", - "Requirement already satisfied: click<9,>=7.0 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (8.1.7)\n", - "Requirement already satisfied: cloudpickle<4 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.0.0)\n", - "Requirement already satisfied: entrypoints<1 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.4)\n", - "Requirement already satisfied: pyyaml<7,>=5.1 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (6.0.1)\n", - "Requirement already satisfied: protobuf<5,>=3.12.0 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.25.3)\n", - "Requirement already satisfied: pytz<2024 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2023.3.post1)\n", - "Requirement already satisfied: requests<3,>=2.17.3 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.31.0)\n", - "Requirement already satisfied: packaging<24 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (23.1)\n", - "Requirement already satisfied: importlib-metadata!=4.7.0,<8,>=3.7.0 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (7.0.1)\n", - "Requirement already satisfied: sqlparse<1,>=0.4.0 in /opt/conda/lib/python3.9/site-packages (from mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.4.4)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /opt/conda/lib/python3.9/site-packages (from gitpython->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.0.11)\n", - "Requirement already satisfied: botocore<1.35.0,>=1.34.58 in /opt/conda/lib/python3.9/site-packages (from boto3->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.34.58)\n", - "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.9/site-packages (from boto3->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.0.1)\n", - "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /opt/conda/lib/python3.9/site-packages (from boto3->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.10.0)\n", - "Requirement already satisfied: httpx>=0.23.1 in /opt/conda/lib/python3.9/site-packages (from git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.24.1)\n", - "Requirement already satisfied: hypercorn>=0.14.3 in /opt/conda/lib/python3.9/site-packages (from git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.14.4)\n", - "Requirement already satisfied: quart>=0.19.4 in /opt/conda/lib/python3.9/site-packages (from git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.19.4)\n", - "Requirement already satisfied: joblib in /opt/conda/lib/python3.9/site-packages (from nltk->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.3.2)\n", - "Requirement already satisfied: regex>=2021.8.3 in /opt/conda/lib/python3.9/site-packages (from nltk->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2023.12.25)\n", - "Requirement already satisfied: numpy>=1.22.4 in /opt/conda/lib/python3.9/site-packages (from pandas->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.25.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.9/site-packages (from pandas->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.8.2)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.9/site-packages (from pandas->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2023.3)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /opt/conda/lib/python3.9/site-packages (from sqlalchemy->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.7.1)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.9/site-packages (from sqlalchemy->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.0.3)\n", - "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.9/site-packages (from botocore<1.35.0,>=1.34.58->boto3->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.26.18)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /opt/conda/lib/python3.9/site-packages (from gitdb<5,>=4.0.1->gitpython->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (5.0.1)\n", - "Requirement already satisfied: certifi in /opt/conda/lib/python3.9/site-packages (from httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2023.7.22)\n", - "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /opt/conda/lib/python3.9/site-packages (from httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.17.3)\n", - "Requirement already satisfied: idna in /opt/conda/lib/python3.9/site-packages (from httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.4)\n", - "Requirement already satisfied: sniffio in /opt/conda/lib/python3.9/site-packages (from httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.3.0)\n", - "Requirement already satisfied: h11 in /opt/conda/lib/python3.9/site-packages (from hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (0.14.0)\n", - "Requirement already satisfied: h2>=3.1.0 in /opt/conda/lib/python3.9/site-packages (from hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.1.0)\n", - "Requirement already satisfied: priority in /opt/conda/lib/python3.9/site-packages (from hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.0.0)\n", - "Requirement already satisfied: wsproto>=0.14.0 in /opt/conda/lib/python3.9/site-packages (from hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.2.0)\n", - "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.9/site-packages (from importlib-metadata!=4.7.0,<8,>=3.7.0->mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.17.0)\n", - "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.16.0)\n", - "Requirement already satisfied: aiofiles in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (23.2.1)\n", - "Requirement already satisfied: blinker>=1.6 in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.7.0)\n", - "Requirement already satisfied: flask>=3.0.0 in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.0.2)\n", - "Requirement already satisfied: itsdangerous in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.1.2)\n", - "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.1.3)\n", - "Requirement already satisfied: markupsafe in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (2.1.3)\n", - "Requirement already satisfied: werkzeug>=3.0.0 in /opt/conda/lib/python3.9/site-packages (from quart>=0.19.4->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.0.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.9/site-packages (from requests<3,>=2.17.3->mlflow-skinny==2.10.*->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (3.2.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /opt/conda/lib/python3.9/site-packages (from h2>=3.1.0->hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /opt/conda/lib/python3.9/site-packages (from h2>=3.1.0->hypercorn>=0.14.3->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.0.0)\n", - "Requirement already satisfied: anyio<5.0,>=3.0 in /opt/conda/lib/python3.9/site-packages (from httpcore<0.18.0,>=0.15.0->httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (4.0.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.9/site-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx>=0.23.1->git-lfs-http-mirror->dwutils@ git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest) (1.2.0)\n" - ] - } - ], - "source": [ - "!pip install dwutils@git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2c76d233-7a6a-4d82-abd0-3ba75343de58", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "f3912d8f-6c0c-4767-bd6d-1af8339b9605", - "metadata": {}, - "source": [ - "# Massive connected components\n", - "\n", - "Connected components crashes on 90m probabilities. We need to be able to handle that and more." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "05f280b4-811c-48b9-8316-87c5414b41a8", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": "ignoring unparsable config /home/theia/company-matching/pyproject.toml\nignoring unparsable config /home/theia/company-matching/pyproject.toml\n" - } - ], - "source": [ - "import cmf\n", - "from cmf import clean\n", - "from cmf.clean import steps\n", - "from cmf.data.utils import sqa_profiled\n", - "from cmf.dedupers import NaiveDeduper\n", - "from cmf.helpers import cleaner, cleaners, selector\n", - "from cmf.data.results import ClusterResults, ProbabilityResults\n", - "\n", - "import logging\n", - "\n", - "from dwutils import s3\n", - "\n", - "import pandas as pd\n", - "from pandas import DataFrame\n", - "import pyarrow as pa\n", - "import rustworkx as rx\n", - "\n", - "from typing import Optional\n", - "\n", - "def create_cmf_pipelines_logger() -> logging.Logger:\n", - " pipeline_logger = logging.getLogger(\"cmf_pipelines\")\n", - " logic_logger = logging.getLogger(\"cmf_logic\")\n", - "\n", - " pipeline_logger.setLevel(logging.INFO)\n", - " logic_logger.setLevel(logging.INFO)\n", - "\n", - " handler = logging.StreamHandler()\n", - " formatter = logging.Formatter(\n", - " \"[%(asctime)s: %(levelname)s] %(name)s %(module)s: %(message)s\"\n", - " )\n", - " handler.setFormatter(formatter)\n", - "\n", - " pipeline_logger.addHandler(handler)\n", - " logic_logger.addHandler(handler)\n", - "\n", - " return pipeline_logger\n", - "\n", - "\n", - "logger = create_cmf_pipelines_logger()" - ] - }, - { - "cell_type": "markdown", - "id": "0d431277-3fee-46b7-bdbf-5944e9a750c4", - "metadata": {}, - "source": [ - "## Pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "09e50c7d-107d-43e2-88ec-c184f2d5a40f", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"naive_hmrc_exports_v1\"\n", - "_SOURCE = \"hmrc.trade__exporters\"\n", - "\n", - "\n", - "def _query(limit: Optional[int] = None) -> DataFrame:\n", - " \"\"\"Select data.\"\"\"\n", - "\n", - " exp_selector = selector(\n", - " table=_SOURCE,\n", - " fields=[\"company_name\", \"postcode\"],\n", - " )\n", - "\n", - " exp_raw = cmf.query(selector=exp_selector, return_type=\"pandas\", limit=limit)\n", - "\n", - " logger.info(\n", - " \"Data retrieved successfully with %s unique datapoints\",\n", - " exp_raw.data_sha1.nunique(),\n", - " )\n", - "\n", - " return exp_raw\n", - "\n", - "\n", - "def _process(raw: DataFrame) -> DataFrame:\n", - " \"\"\"Clean data.\"\"\"\n", - "\n", - " clean_exp = cleaners(\n", - " cleaner(clean.company_name, {\"column\": \"hmrc_trade__exporters_company_name\"}),\n", - " cleaner(clean.postcode, {\"column\": \"hmrc_trade__exporters_postcode\"}),\n", - " )\n", - "\n", - " exp_clean = cmf.process(raw, clean_exp)\n", - "\n", - " logger.info(\"Data cleaned successfully\")\n", - "\n", - " return exp_clean\n", - "\n", - "\n", - "def _deduplicate(clean: DataFrame) -> ProbabilityResults:\n", - " \"\"\"Deduplicate data.\"\"\"\n", - "\n", - " exp_naive_deduper = cmf.make_deduper(\n", - " dedupe_run_name=_NAME,\n", - " description=\"Basic cleaning of name and postcode.\",\n", - " deduper=NaiveDeduper,\n", - " deduper_settings={\n", - " \"id\": \"data_sha1\",\n", - " \"unique_fields\": [\n", - " \"hmrc_trade__exporters_company_name\",\n", - " \"hmrc_trade__exporters_postcode\",\n", - " ],\n", - " },\n", - " data=clean,\n", - " data_source=_SOURCE,\n", - " )\n", - "\n", - " exp_deduped = exp_naive_deduper()\n", - "\n", - " logger.info(\n", - " \"Data deduplicated successfully. %s probabilities generated\",\n", - " exp_deduped.dataframe.shape[0],\n", - " )\n", - "\n", - " return exp_deduped\n", - "\n", - "\n", - "def _cluster(deduped: ProbabilityResults, clean: DataFrame) -> ClusterResults:\n", - " \"\"\"Resolve probabilities to clusters.\"\"\"\n", - " exp_clusters = cmf.to_clusters(clean, results=deduped, key=\"data_sha1\", threshold=1)\n", - "\n", - " logger.info(\n", - " \"Clusters resolved successfully. %s clusters generated\",\n", - " exp_clusters.dataframe.parent.nunique(),\n", - " )\n", - "\n", - " return exp_clusters\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9bea25be-9933-4310-89b5-486d4a8e820c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": "[2024-03-14 12:10:54,385: INFO] cmf_pipelines 2290665410: Data retrieved successfully with 300000 unique datapoints\n[2024-03-14 12:10:59,014: INFO] cmf_pipelines 2290665410: Data cleaned successfully\n[2024-03-14 12:11:00,349: INFO] cmf_pipelines 2290665410: Data deduplicated successfully. 564691 probabilities generated\n[2024-03-14 12:11:03,129: INFO] cmf_pipelines 2290665410: Clusters resolved successfully. 109808 clusters generated\n" - } - ], - "source": [ - "ew_raw = _query(limit=300_000)\n", - "ew_clean = _process(raw=ew_raw)\n", - "ew_deduped = _deduplicate(clean=ew_clean)\n", - "ew_clusters = _cluster(deduped=ew_deduped, clean=ew_clean)" - ] - }, - { - "cell_type": "markdown", - "id": "293d7549-7643-4bce-8aab-91954a2e67cf", - "metadata": {}, - "source": [ - "## Playing around" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from dwutils import db\n", - "\n", - "# x = db.query(\"\"\"\n", - "# select 'drop table if exists \"_team_cmf\".\"' || tablename || '\" cascade;' as queries\n", - "# from pg_tables\n", - "# where schemaname = '_team_cmf';\n", - "# \"\"\")[\"queries\"].to_list()\n", - "\n", - "# # for query in x:\n", - "# # db.execute(query)\n", - "\n", - "# x" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "c5a194b2-0168-4f4a-849d-08e552b9d311", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
03793000
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 3793000" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from dwutils import db\n", - "\n", - "db.query(f\"select count(*) from {_SOURCE};\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "564691" - }, - "metadata": {}, - "execution_count": 6 - } - ], - "source": [ - "ew_deduped.dataframe.shape[0]" - ] - }, - { - "cell_type": "markdown", - "id": "34af98ab-44ea-40f9-8485-640f8b684a0f", - "metadata": {}, - "source": [ - "For 567,484 probabilities using the `WriteOnlyMapped` methodology.\n", - "\n", - "* 394 seconds at 500k batch size\n", - "* 585 seconds at 250k batch size\n", - " * `execute` and `_emit_insert_statements` are like 400s of that\n", - " * 390 on second run\n", - "* 370 seconds at 100k batch size\n", - "* 370 seconds at 50k batch \n", - "* 370ish seconds at 10k batch\n", - "\n", - "Concerned the first-run test absorbs a lot of the processing time." - ] - }, - { - "cell_type": "markdown", - "id": "95c1de66-526e-47f7-b084-7590640d92bc", - "metadata": {}, - "source": [ - "For 567,484 probabilities using the `pg-bulk-ingest` methodology.\n", - "\n", - "* 177 seconds at 500k batch size\n", - "* 226 seconds at 250k batch size\n", - " * `execute` is 189 of that time, but it's faster!!\n", - "* 187 seconds at 100k batch size\n", - "* 225 seconds at 50k batch\n", - " * Again, `execute` about 189. Suspect the differences between 180ish and 220ish are that I'm fiddling about and certain things aren't being computed twice\n", - "* 293 seconds at 10k batch\n", - "\n", - "Overall, larger batch sizes seem to be marginally more efficient." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2f462c5a-8469-4403-9464-ee2275f9b8c4", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": "[2024-03-14 12:11:13,113: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Registering model\n[2024-03-14 12:11:13,142: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Writing deduplication data with batch size 500000\nCPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs\nWall time: 5.72 µs\n[2024-03-14 12:11:40,381: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Processed 564691 link probabilities\n[2024-03-14 12:11:49,442: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Removed old deduplication probabilities\n[2024-03-14 12:11:49,443: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Inserting 564691 deduplication objects\n[2024-03-14 12:19:36,803: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Inserted all 564691 deduplication objects\n[2024-03-14 12:19:36,806: INFO] cmf_logic results: [naive_hmrc_exports_v1, ProbabilityResults] Complete!\n 62737701 function calls (60474888 primitive calls) in 503.692 seconds\n\n Ordered by: cumulative time\n\n ncalls tottime percall cumtime percall filename:lineno(function)\n 1 0.001 0.001 503.694 503.694 /home/theia/company-matching/cmf/data/results.py:138(to_cmf)\n 1 0.214 0.214 503.662 503.662 /home/theia/company-matching/cmf/data/results.py:292(_deduper_to_cmf)\n 2 0.047 0.023 467.347 233.673 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:44(ingest)\n 1372 0.015 0.000 466.534 0.340 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1794(_execute_context)\n 1222 0.002 0.000 466.421 0.382 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1377(execute)\n 1372 0.008 0.000 465.389 0.339 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1853(_exec_single_context)\n 1252 0.002 0.000 464.583 0.371 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:505(_execute_on_connection)\n 1252 0.012 0.000 464.581 0.371 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1593(_execute_clauseelement)\n 1372 0.001 0.000 462.023 0.337 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:923(do_execute)\n 1372 462.010 0.337 462.022 0.337 {method 'execute' of 'psycopg2.extensions.cursor' objects}\n 1 0.235 0.235 26.998 26.998 /home/theia/company-matching/cmf/data/results.py:236(_prep_to_cmf)\n 2 0.046 0.023 22.227 11.114 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2651(all)\n 7/6 0.000 0.000 17.928 2.988 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2245(execute)\n 7/6 0.000 0.000 17.928 2.988 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2078(_execute_internal)\n 6 0.000 0.000 17.927 2.988 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:283(orm_execute_statement)\n 184 0.001 0.000 16.031 0.087 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:543(_allrows)\n 60 0.001 0.000 14.827 0.247 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:216(csv_copy)\n 60 0.000 0.000 14.815 0.247 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:59(copy_from_stdin2)\n 60 0.837 0.014 14.815 0.247 {method 'copy_expert' of 'psycopg2.extensions.cursor' objects}\n1134931/5548 1.394 0.000 14.251 0.003 {method 'join' of 'str' objects}\n 1975 0.003 0.000 13.978 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/to_file_like_obj.py:29(read)\n 1133219 1.089 0.000 13.758 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/to_file_like_obj.py:9(up_to_iter)\n 32 0.006 0.000 12.851 0.402 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1761(all)\n2261673/1132115 0.628 0.000 12.225 0.000 {built-in method builtins.next}\n 1129442 0.944 0.000 11.857 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:262()\n 32 0.000 0.000 11.431 0.357 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1675(_fetchall_impl)\n 3 0.051 0.017 11.426 3.809 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2266(_fetchall_impl)\n 10 0.071 0.007 11.376 1.138 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:207(chunks)\n 3 0.000 0.000 9.391 3.130 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2822(_iter)\n 4517528 1.965 0.000 8.575 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:263()\n 1 0.000 0.000 8.512 8.512 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1918(orm_execute_statement)\n 256/182 0.001 0.000 7.083 0.039 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/deprecations.py:249(warned)\n 196/122 0.001 0.000 7.082 0.058 :1(__new__)\n 196/122 0.000 0.000 7.081 0.058 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:424(__new__)\n 196/122 0.002 0.000 7.081 0.058 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:433(_new)\n 374/244 0.004 0.000 7.078 0.029 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:488(__init__)\n 88/32 0.001 0.000 7.058 0.221 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:881(_autoload)\n 88/32 0.002 0.000 7.055 0.220 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1468(reflect_table)\n 88 0.001 0.000 6.884 0.078 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1950(_get_reflection_info)\n 704 0.002 0.000 6.882 0.010 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1970(run)\n 2 0.000 0.000 6.608 3.304 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:294()\n 4 0.140 0.035 6.107 1.527 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:223()\n 380386 0.945 0.000 5.967 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1050(_instance)\n 186 0.000 0.000 5.804 0.031 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2131(_fetchall_impl)\n 186 0.000 0.000 5.804 0.031 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1129(fetchall)\n 186 2.420 0.013 5.797 0.031 {method 'fetchall' of 'psycopg2.extensions.cursor' objects}\n 88 0.001 0.000 5.594 0.064 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:876(get_multi_columns)\n 88 0.002 0.000 5.592 0.064 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3631(get_multi_columns)\n 356 0.003 0.000 5.413 0.015 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:78(cache)\n 5 0.000 0.000 5.199 1.040 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:537(_raw_all_rows)\n 380386 0.379 0.000 4.250 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:502(new_instance)\n 88/32 0.001 0.000 4.183 0.131 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1715(_reflect_fk)\n 1 0.451 0.451 3.764 3.764 /home/theia/company-matching/cmf/data/utils/sha1.py:98(columns_to_value_ordered_sha1)\n 380387 3.688 0.000 3.720 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:193(__init__)\n 1372 0.007 0.000 3.354 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1806(_setup_result_proxy)\n 380387 1.200 0.000 3.335 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:669()\n 3388146 2.925 0.000 3.331 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:281(__getitem__)\n 187 0.001 0.000 3.326 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1876(_setup_dml_or_text_result)\n 89 0.137 0.002 3.322 0.037 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1362(all)\n 88 0.000 0.000 2.909 0.033 :1(_load_domains)\n 88 0.002 0.000 2.908 0.033 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4878(_load_domains)\n 564692 0.708 0.000 2.549 0.000 /home/theia/company-matching/cmf/data/utils/sha1.py:79(list_to_value_ordered_sha1)\n 6 0.057 0.009 2.493 0.415 /home/theia/company-matching/cmf/data/utils/db.py:202(batches)\n 2823455 1.443 0.000 2.331 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:257()\n 150 0.001 0.000 2.249 0.015 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2448(_run_ddl_visitor)\n 270/150 0.001 0.000 2.248 0.015 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:660(traverse_single)\n 88 0.000 0.000 2.163 0.025 :1(_load_enums)\n 88 0.001 0.000 2.162 0.025 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4805(_load_enums)\n 380447 1.845 0.000 2.125 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:138(__init__)\n 6 0.276 0.046 2.122 0.354 /home/theia/company-matching/cmf/data/utils/db.py:187(batched)\n 120 0.000 0.000 2.031 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:312(_invoke_with)\n 120 0.000 0.000 2.031 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:177(_execute_on_connection)\n 120 0.001 0.000 2.031 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1498(_execute_ddl)\n 90 0.000 0.000 1.968 0.022 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5801(create_all)\n 90 0.001 0.000 1.966 0.022 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:890(visit_metadata)\n 1129384 1.266 0.000 1.846 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:508(__getitem__)\n 60 0.001 0.000 1.838 0.031 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:928(visit_table)\n 5 0.266 0.053 1.353 0.271 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:541()\n 1 0.051 0.051 1.294 1.294 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:751(orm_setup_cursor_result)\n 182 0.931 0.005 1.262 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:551()\n 1 0.365 0.365 1.244 1.244 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:2011(_do_post_synchronize_fetch)\n 1129386 0.596 0.000 1.208 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:165(batch_for_current_table_until_a_queue_full)\n 2 1.157 0.579 1.196 0.598 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:562()\n 1252 0.033 0.000 1.128 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1276(_init_compiled)\n6096340/6096332 0.793 0.000 1.126 0.000 {built-in method builtins.isinstance}\n 564692 0.446 0.000 1.046 0.000 /home/theia/company-matching/cmf/data/utils/sha1.py:89()\n 382272 0.722 0.000 0.979 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:369(process)\n 564691 0.298 0.000 0.948 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:259()\n 706 0.010 0.000 0.712 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2032(_process_parameters_for_postcompile)\n 564691 0.420 0.000 0.650 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:223(escape_string)\n 1 0.000 0.000 0.540 0.540 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:832(_interpret_returning_rows)\n 1 0.370 0.370 0.540 0.540 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:879()\n 1129384 0.507 0.000 0.507 0.000 {method 'digest' of '_hashlib.HASH' objects}\n 2823455 0.465 0.000 0.465 0.000 {method 'hex' of 'bytes' objects}\n 352 0.002 0.000 0.462 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3984(_reflect_constraint)\n 2 0.000 0.000 0.455 0.228 /home/theia/company-matching/cmf/data/utils/db.py:99(schema_table_to_table)\n 2 0.000 0.000 0.455 0.228 /home/theia/company-matching/cmf/data/utils/db.py:82(string_to_table)\n 1 0.000 0.000 0.442 0.442 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_decorators.py:325(wrapper)\n 1 0.000 0.000 0.442 0.442 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:2051(to_dict)\n 1 0.000 0.000 0.442 0.442 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:78(to_dict)\n 1 0.432 0.432 0.441 0.441 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:225()\n 2823455 0.424 0.000 0.424 0.000 {method 'upper' of 'str' objects}\n 706 0.004 0.000 0.416 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3289(_literal_execute_expanding_parameter)\n 1509768 0.406 0.000 0.406 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:906(process)\n 3401255 0.387 0.000 0.387 0.000 {method 'replace' of 'str' objects}\n 380386 0.332 0.000 0.347 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1308(_populate_full)\n 1129382 0.333 0.000 0.333 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:242(__getattribute__)\n 1129384 0.229 0.000 0.329 0.000 /home/theia/company-matching/cmf/data/utils/sha1.py:67(prep_for_hash)\n 2 0.033 0.016 0.326 0.163 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:2055(tolist)\n 88 0.001 0.000 0.320 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:964(get_multi_pk_constraint)\n 176 0.000 0.000 0.318 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4044()\n 1252 0.240 0.000 0.318 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1491()\n 2 0.000 0.000 0.313 0.157 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:2534(to_records)\n 8 0.000 0.000 0.296 0.037 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:317(apply)\n 5/3 0.000 0.000 0.295 0.098 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6460(astype)\n 4 0.000 0.000 0.294 0.073 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:421(astype)\n 5 0.000 0.000 0.294 0.059 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:723(astype)\n 5 0.000 0.000 0.293 0.059 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:191(astype_array_safe)\n 5 0.000 0.000 0.293 0.059 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:157(astype_array)\n 380386 0.293 0.000 0.293 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:635(__iter__)\n 60 0.000 0.000 0.282 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5829(drop_all)\n 60 0.001 0.000 0.281 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1011(visit_metadata)\n 4437 0.186 0.000 0.279 0.000 {method 'update' of 'dict' objects}\n 1129384 0.271 0.000 0.271 0.000 {built-in method _hashlib.openssl_sha1}\n 385077 0.258 0.000 0.258 0.000 {built-in method __new__ of type object at 0x56304d777380}\n2648008/2647611 0.254 0.000 0.254 0.000 {built-in method builtins.len}\n 88 0.001 0.000 0.248 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1139(get_multi_indexes)\n 88 0.001 0.000 0.247 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1051(get_multi_foreign_keys)\n 88 0.002 0.000 0.246 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4405(get_multi_indexes)\n 88 0.002 0.000 0.245 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4156(get_multi_foreign_keys)\n 381662 0.195 0.000 0.241 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3077(_apply_item_processor)\n 1131257 0.231 0.000 0.231 0.000 {built-in method builtins.min}\n 2 0.020 0.010 0.228 0.114 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:588(fromarrays)\n 381794 0.096 0.000 0.226 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3362()\n 565853 0.211 0.000 0.211 0.000 {built-in method builtins.sorted}\n 2 0.000 0.000 0.208 0.104 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:418(__new__)\n 1511921 0.206 0.000 0.206 0.000 {method 'add' of 'set' objects}\n 41174 0.074 0.000 0.201 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:770(_clean_thread_parent_frames)\n 150 0.000 0.000 0.199 0.001 :1(has_table)\n 60 0.001 0.000 0.199 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1128(visit_table)\n 150 0.001 0.000 0.199 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3295(has_table)\n 150 0.000 0.000 0.197 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1287(scalar)\n 150 0.001 0.000 0.197 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:520(_execute_on_scalar)\n 1129386 0.186 0.000 0.186 0.000 /home/theia/company-matching/cmf/data/utils/db.py:204()\n 2 0.133 0.067 0.183 0.091 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:560()\n 381809 0.164 0.000 0.179 0.000 {method 'issuperset' of 'frozenset' objects}\n 2 0.000 0.000 0.174 0.087 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:656(astype)\n 9 0.146 0.016 0.174 0.019 {built-in method numpy.array}\n 88 0.001 0.000 0.170 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1410(get_multi_check_constraints)\n 88 0.001 0.000 0.169 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4697(get_multi_check_constraints)\n 1694073 0.163 0.000 0.163 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:879()\n 380387 0.152 0.000 0.152 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:507(_cleanup)\n 88 0.001 0.000 0.151 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1319(get_multi_table_comment)\n 88 0.001 0.000 0.149 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4622(get_multi_table_comment)\n 88 0.001 0.000 0.146 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1227(get_multi_unique_constraints)\n 88 0.001 0.000 0.145 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4545(get_multi_unique_constraints)\n 30 0.000 0.000 0.143 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:354(get_schema_names)\n 264 0.001 0.000 0.143 0.001 :1(_get_table_oids)\n 30 0.000 0.000 0.142 0.005 :1(get_schema_names)\n 264 0.001 0.000 0.142 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:105(go)\n 30 0.001 0.000 0.142 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3366(get_schema_names)\n 1 0.133 0.133 0.140 0.140 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:2024()\n 706 0.132 0.000 0.137 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3358()\n 88 0.000 0.000 0.136 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3877(_get_table_oids)\n 381088 0.130 0.000 0.130 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3317(_render_bindtemplate)\n 380386 0.129 0.000 0.129 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:207(_add_unpresent)\n 7 0.000 0.000 0.128 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1750(__exit__)\n 7 0.000 0.000 0.128 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2423(close)\n 7 0.002 0.000 0.128 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2525(_close_impl)\n 90 0.000 0.000 0.123 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:897()\n 90 0.000 0.000 0.123 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:857(_can_create_table)\n 380387 0.120 0.000 0.120 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:219(get)\n 7 0.012 0.002 0.119 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2533(expunge_all)\n 3 0.000 0.000 0.119 0.040 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:56(_astype_nansafe)\n 3 0.009 0.003 0.119 0.040 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:303(_from_sequence)\n 7 0.000 0.000 0.111 0.016 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:659(__array__)\n 7 0.111 0.016 0.111 0.016 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1356(to_numpy)\n 564692 0.110 0.000 0.110 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:136(__contains__)\n 3 0.098 0.033 0.110 0.037 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:447(_box_pa_array)\n 348 0.001 0.000 0.106 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4182(__init__)\n 729 0.003 0.000 0.106 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:314(expect)\n 380387 0.104 0.000 0.104 0.000 :1(set)\n 250 0.001 0.000 0.104 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4308(__init__)\n 88 0.000 0.000 0.103 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1877(_reflect_unique_constraints)\n 9 0.101 0.011 0.101 0.011 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:459(_detach_states)\n 324 0.000 0.000 0.100 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4211()\n 146 0.001 0.000 0.099 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:524(_post_coercion)\n 146 0.000 0.000 0.098 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:492(_deep_deannotate)\n 146 0.000 0.000 0.097 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:499(clone)\n 1292 0.000 0.000 0.097 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:152(_deannotate)\n 133 0.097 0.001 0.097 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4843(_clone)\n 30 0.000 0.000 0.095 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1334(scalars)\n 381442 0.093 0.000 0.093 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2152()\n 2 0.000 0.000 0.089 0.045 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:2335(unique)\n 564692 0.089 0.000 0.089 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3465(identity_key_from_primary_key)\n 2 0.000 0.000 0.089 0.045 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:1019(unique)\n 2 0.000 0.000 0.089 0.045 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1445(unique)\n 2 0.089 0.044 0.089 0.045 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/compute.py:238(wrapper)\n 2 0.000 0.000 0.085 0.042 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:2636()\n 90/84 0.000 0.000 0.084 0.001 {built-in method numpy.asarray}\n 6 0.000 0.000 0.083 0.014 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:974(__array__)\n 1 0.000 0.000 0.081 0.081 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6634()\n 380386 0.078 0.000 0.078 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:891(process)\n 564692 0.078 0.000 0.078 0.000 {method 'update' of '_hashlib.HASH' objects}\n 60 0.000 0.000 0.077 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1018()\n 60 0.000 0.000 0.077 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1089(_can_drop_table)\n 8 0.000 0.000 0.071 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2615(commit)\n 8 0.000 0.000 0.071 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2732(_do_commit)\n 8 0.000 0.000 0.071 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2707(_connection_commit_impl)\n 8 0.000 0.000 0.071 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1134(_commit_impl)\n 8 0.000 0.000 0.071 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:694(do_commit)\n 8 0.070 0.009 0.070 0.009 {method 'commit' of 'psycopg2.extensions.connection' objects}\n 607580 0.070 0.000 0.070 0.000 {method 'values' of 'dict' objects}\n 581973 0.066 0.000 0.066 0.000 {method 'append' of 'list' objects}\n 20587 0.042 0.000 0.065 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:785()\n 1252 0.005 0.000 0.050 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:667(_compile_w_cache)\n 20587 0.039 0.000 0.049 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1388(enumerate)\n 6 0.000 0.000 0.046 0.008 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:981(commit)\n1174/1100 0.005 0.000 0.044 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1320(_set_parent_with_dispatch)\n 380447 0.044 0.000 0.044 0.000 {method 'count' of 'list' objects}\n 380473 0.042 0.000 0.042 0.000 {method 'strip' of 'str' objects}\n 380536 0.033 0.000 0.033 0.000 {built-in method builtins.id}\n 25/11 0.000 0.000 0.033 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:95(_go)\n 1058 0.027 0.000 0.027 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2166()\n 324 0.002 0.000 0.027 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:822(__init__)\n 2 0.000 0.000 0.026 0.013 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1936(commit)\n 3/2 0.000 0.000 0.026 0.013 :1(commit)\n 3/2 0.000 0.000 0.026 0.013 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1250(commit)\n 501/324 0.001 0.000 0.025 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:909(process)\n 764/324 0.002 0.000 0.024 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:129(_compiler_dispatch)\n 270 0.003 0.000 0.024 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1619(_reflect_column)\n 1372 0.006 0.000 0.023 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1419(__init__)\n 144123 0.023 0.000 0.023 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1109(ident)\n 62040 0.021 0.000 0.022 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:485(make_row)\n 1 0.000 0.000 0.022 0.022 /home/theia/company-matching/cmf/data/results.py:87(_model_to_cmf)\n 6/4 0.000 0.000 0.021 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4259(__setitem__)\n 5 0.000 0.000 0.021 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4502(_set_item)\n 21059 0.019 0.000 0.021 0.000 {method 'get' of 'dict' objects}\n 4 0.003 0.001 0.020 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:389(__init__)\n 1252 0.002 0.000 0.020 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:526(get)\n 1252 0.014 0.000 0.019 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1852(construct_params)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3827(merge)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3914(_merge)\n 22 0.004 0.000 0.019 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:517(sanitize_array)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3523(get)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3700(_get_impl)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:526(load_on_pk_identity)\n 1 0.000 0.000 0.019 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2700(first)\n 860 0.001 0.000 0.019 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1379()\n 2646 0.005 0.000 0.018 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:526(iterrows)\n 12 0.018 0.002 0.018 0.002 {method 'copy' of 'numpy.ndarray' objects}\n 10 0.000 0.000 0.018 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4050(__getitem__)\n 566 0.008 0.000 0.017 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1505(__init__)\n 180 0.000 0.000 0.017 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:243(compile)\n 204 0.001 0.000 0.016 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:309(_compiler)\n 204 0.002 0.000 0.016 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1335(__init__)\n 4 0.000 0.000 0.015 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4139(_take_with_is_copy)\n 4 0.000 0.000 0.015 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4024(take)\n 4 0.000 0.000 0.015 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:869(take)\n 5 0.000 0.000 0.015 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:623(reindex_indexer)\n 5 0.000 0.000 0.015 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:708(_slice_take_blocks_ax0)\n 5 0.000 0.000 0.014 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4469(_set_item_mgr)\n 13 0.000 0.000 0.014 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/array_algos/take.py:59(take_nd)\n 13 0.011 0.001 0.014 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/array_algos/take.py:120(_take_nd_ndarray)\n 3 0.000 0.000 0.014 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1287(take_nd)\n 1 0.000 0.000 0.014 0.014 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4308(_setitem_array)\n 120 0.000 0.000 0.014 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:65(_compiler)\n 3 0.013 0.004 0.013 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4458(_iset_item_mgr)\n 1128 0.003 0.000 0.013 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1482(_init_metadata)\n 8 0.000 0.000 0.013 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:124(maybe_convert_platform)\n 2040 0.011 0.000 0.013 0.000 {method 'sub' of 're.Pattern' objects}\n 8 0.011 0.001 0.013 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1577(construct_1d_object_array_from_listlike)\n 1372 0.003 0.000 0.012 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1719(create_cursor)\n 60 0.001 0.000 0.012 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6574(visit_create_table)\n 804 0.001 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:203(sub)\n 534 0.003 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2291(_set_parent)\n 1258 0.003 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1529(_soft_close)\n 806/246 0.003 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:119(as_string)\n 270 0.001 0.000 0.010 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1208(append_column)\n 88 0.004 0.000 0.010 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3664(_get_columns_info)\n 1372 0.002 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1740(create_default_cursor)\n 1250/979 0.006 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1249(__get__)\n 266 0.002 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4923(_set_parent)\n 506 0.004 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:194(format)\n 82356 0.009 0.000 0.009 0.000 {method 'keys' of 'dict' objects}\n 2646 0.002 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2119(_fetchiter_impl)\n 231 0.001 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1264(oneshot)\n 744 0.001 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:219(_init_items)\n 146 0.000 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1249(append_constraint)\n 7 0.000 0.000 0.008 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4657(visit_select)\n 459 0.003 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:285(_adapt_to_context)\n 88 0.000 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1691(_reflect_pk)\n 5 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2577(close)\n 5 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2726(_do_close)\n 1252 0.001 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2210(_safe_close_cursor)\n 5 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2710(_close_impl)\n 5 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2704(_connection_rollback_impl)\n 5 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1116(_rollback_impl)\n 13 0.000 0.000 0.008 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:691(do_rollback)\n 13 0.008 0.001 0.008 0.001 {method 'rollback' of 'psycopg2.extensions.connection' objects}\n 1514 0.004 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1298(__getattr__)\n 228 0.000 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:411(_generate_cache_key)\n 2796 0.002 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1097(fetchone)\n 979 0.001 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1436(info)\n1486/1338 0.002 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1137(__get__)\n 1372 0.002 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1483(cursor)\n 88 0.000 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4963(_reload)\n 228 0.001 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:347(_generate_cache_key)\n 7911 0.004 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/encodings/utf_8.py:15(decode)\n 1 0.000 0.000 0.007 0.007 /home/theia/company-matching/cmf/data/utils/sha1.py:17(table_name_to_uuid)\n 1252 0.007 0.000 0.007 0.000 {method 'close' of 'psycopg2.extensions.cursor' objects}\n 1327 0.002 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:437(__get__)\n 240 0.001 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:258(join)\n 6 0.000 0.000 0.007 0.001 :1(close)\n1335/1247 0.002 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:123(__exit__)\n 6 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1346(close)\n 30 0.001 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:89(create_first_batch_ingest_table_if_necessary)\n 5 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:5229(_sanitize_column)\n 7 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1565(_log)\n 236 0.000 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:363()\n 10/8 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:660(create_for_statement)\n 74 0.000 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4777(_set_parent)\n 1432 0.006 0.000 0.006 0.000 {method 'cursor' of 'psycopg2.extensions.connection' objects}\n 591/228 0.003 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:221(_gen_cache_key)\n 176 0.001 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6626(visit_create_column)\n 9 0.006 0.001 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:259(all_states)\n 7 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1591(handle)\n 7 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1645(callHandlers)\n 7 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:939(handle)\n 7 0.000 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1071(emit)\n 5 0.000 0.000 0.005 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1000(create_for_statement)\n 792 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:827(_iter_impl)\n 98 0.000 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2427(_on_table_attach)\n 74 0.000 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4556(__init__)\n 1380 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:349(as_string)\n 634 0.002 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2020(replace)\n 4 0.000 0.000 0.005 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:80(save_comment)\n 1327 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:183(_for_instance)\n 51 0.005 0.000 0.005 0.000 {built-in method numpy.empty}\n1334/1246 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:114(__enter__)\n 176 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2105(get_column_specification)\n 2048 0.003 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:289(_compile)\n 87 0.000 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1505(operate)\n 470 0.001 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4358(_set_parent)\n 7 0.000 0.000 0.005 0.001 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1060(flush)\n 30 0.005 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:575()\n 3788 0.002 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:374(__call__)\n 7 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:592(flush)\n 264 0.002 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:574(_ad_hoc_cache_key_from_args)\n 7 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4486(__init__)\n 4 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:12662(_reindex_for_setitem)\n 1372 0.002 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:857(dialect_impl)\n 20614 0.004 0.000 0.004 0.000 {method '__exit__' of '_thread.RLock' objects}\n 534 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2201(_set_type)\n 87 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:186(operate)\n 1066 0.004 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1895()\n 7 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/threading.py:563(wait)\n 200 0.004 0.000 0.004 0.000 {method 'acquire' of '_thread.lock' objects}\n 7 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/threading.py:280(wait)\n 60 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:75(bind_identifiers)\n 156 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4219(_check_attach)\n 208/180 0.000 0.000 0.004 0.000 {built-in method builtins.repr}\n 1327 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:180(_for_class)\n 274 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:1565(text)\n 8 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2214(_generate_columns_plus_names)\n 1334 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:261(helper)\n 88 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1811(_reflect_indexes)\n 86 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:51(_boolean_compare)\n 188/180 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1098(__repr__)\n 740 0.003 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1958(_append_new_column)\n4540/4377 0.003 0.000 0.003 0.000 {built-in method builtins.hasattr}\n 489 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:177(_make_new_metadata)\n 288 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2286(__init__)\n 176 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1995(extend)\n 60 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:317(__str__)\n 2 0.000 0.000 0.003 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:5623(rename)\n 443 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1834(_unwrapped_dialect_impl)\n 2 0.000 0.000 0.003 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:1069(_rename)\n 264 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2076(__iter__)\n 188/180 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:746(generic_repr)\n 518 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4297(_set_parent)\n 74 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3209(_set_parent)\n 746 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:104(__init__)\n 7911 0.003 0.000 0.003 0.000 {built-in method _codecs.utf_8_decode}\n 151 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1262(sort_tables_and_constraints)\n 24 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5163(__init__)\n 176 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1967(_populate_separate_keys)\n 1092 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:587(_validate_dialect_kwargs)\n 88 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:684(get_multi_table_options)\n 2760 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:350()\n 3100 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:159(__getattr__)\n 74 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3235(_set_table)\n 1705 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1576(__iter__)\n 236 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:405()\n 1424 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:205(_effective_processors)\n 118 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:284()\n 494 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:191(_constraint_name)\n 1 0.000 0.000 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:5161(assign)\n 176 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:330(_inspection_context)\n 2372 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:181(__init__)\n 197 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2668(visit_textclause)\n 147/125 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:335()\n 3 0.000 0.000 0.003 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6659(copy)\n 42 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1724(operate)\n 528 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1250(__iter__)\n 178 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4903(__init__)\n 792 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:502(_iterator_getter)\n 1327 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:137(__init__)\n 2796 0.003 0.000 0.003 0.000 {method 'fetchone' of 'psycopg2.extensions.cursor' objects}\n 2826 0.003 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:415(__getitem__)\n 4 0.000 0.000 0.003 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:557(copy)\n 1334 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:86(__init__)\n 1820 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:309(_operation_context)\n 608 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2416(_setup_on_memoized_fks)\n 3915 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:774(__hash__)\n 74 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4676()\n 118 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:130()\n 538 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7496(quote)\n 26 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:790(copy)\n 178 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1002(_extra_kwargs)\n 120 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7652(format_table)\n 24 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1009(_set_parent)\n 1380 0.001 0.000 0.002 0.000 {built-in method psycopg2._psycopg.quote_ident}\n 542 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:198(search)\n 177 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:343(__missing__)\n 60/30 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:963(not_like)\n 10 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1221(close)\n 1372 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:567(post_exec)\n6636/6635 0.002 0.000 0.002 0.000 {built-in method builtins.getattr}\n 564 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:182(_make_key_to_index)\n 178 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2958(__init__)\n 196 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:849(__call__)\n 300 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:817(with_ddl_events)\n 30 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2037(not_like_op)\n 60 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/to_file_like_obj.py:4(to_file_like_obj)\n 24 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:41(listen)\n 1052 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5140(__new__)\n 4 0.000 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6186(_get_indexer_strict)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1516(construct_1d_arraylike_from_scalar)\n 69 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:347(__init__)\n 60 0.000 0.000 0.002 0.000 {built-in method builtins.__build_class__}\n 2078 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1602(executemany)\n 88 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1518()\n 119 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:273(_generative)\n 1128 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1223(_set_memoized_attribute)\n 95 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1948(__init__)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4080(_foreing_key_query)\n 151 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1530(scalar)\n 94/44 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:575(__eq__)\n 128 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:378(as_string)\n 17 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:475(__new__)\n 120 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1231(_init_ddl)\n 12 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3820(get_indexer)\n 153 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:741(_only_one_row)\n 60 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2391(post_create_table)\n 180 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:850(c)\n 53/47 0.000 0.000 0.002 0.000 {built-in method _operator.eq}\n 74 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3055(_resolve_col_tokens)\n 81 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7442(_requires_quotes)\n 60 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:105(__new__)\n 494 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:152(_constraint_name_for_table)\n 88 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1038(_default_multi_reflect)\n 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1176(__getitem__)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3502(_columns_query)\n 144 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:387()\n 124 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/inspection.py:113(inspect)\n 7 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6162(get_indexer_for)\n 8/7 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2767(_generate_delimited_and_list)\n 180/176 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:949(process)\n 172 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:552(_kw_reg_for_dialect_cls)\n 118 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:133()\n 1116 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:321(__init__)\n 176 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4630()\n 1058 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2172(process_expanding)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:280(listen)\n 566 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4814(__init__)\n 144 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:550(__setitem__)\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4926(_compose_select_body)\n 60 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:52(temp_relation_name)\n 256 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:767(__contains__)\n 12/11 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3368(visit_binary)\n 938/898 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1126(__get__)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1689(_getitem_tuple)\n 43 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:969(_dialect_info)\n 45 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:790(_literal_coercion)\n 234 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:377(__getitem__)\n 60 0.000 0.000 0.001 0.000 :1(__init__)\n 564 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:185()\n 275 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:58(sort)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:3971(_ixs)\n 45 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4517(_bind_param)\n 528 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1519(__init__)\n 566 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4275(_col_expressions)\n 69 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:496(_merge_cursor_description)\n 804 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4363(__contains__)\n 338 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1927(add)\n 188 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/compat.py:65(inspect_getfullargspec)\n 240 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:832(with_ddl_events)\n 88 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1925(_reflect_check_constraints)\n 1216 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1169(key)\n 94 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5350(safe_construct)\n 86 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2830(_construct_for_op)\n 11/10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3459(_generate_generic_binary)\n 248 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:564(dialect_options)\n 11/6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:343(_compiler_dispatch)\n 324 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4063(__init__)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3276(_pg_class_relkind_condition)\n 4362 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:49(__init__)\n 1372 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:570(_log_notices)\n 1101 0.001 0.000 0.001 0.000 {method 'search' of 're.Pattern' objects}\n 128 0.001 0.000 0.001 0.000 {built-in method psycopg2._psycopg.adapt}\n 1348 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1621(__contains__)\n 206 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:228(_construct)\n 17 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4432(_label_select_column)\n 122 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5348(__init__)\n 60 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:713(uuid4)\n 224 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:825(__iter__)\n 3/2 0.000 0.000 0.001 0.001 :1(_prepare_impl)\n 3/2 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1215(_prepare_impl)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:177(_listen)\n 300 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:442(_row_getter)\n 31 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7593(ensure_index)\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4266(flush)\n 60 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6641(create_table_constraints)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1032(_getitem_lowerdim)\n 1086 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/types.py:171(__get__)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:362(_listen)\n 277 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:870(_unwrapped_dialect_impl)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4315(_flush)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5707(filter)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6258(visit_delete)\n 275 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:30(sort_as_subsets)\n 144 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:561(_manage_size)\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4768()\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:333(base_listen)\n 1 0.001 0.001 0.001 0.001 {method 'fill' of 'numpy.ndarray' objects}\n 494 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:142(_get_convention)\n 74 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2769(__init__)\n 42 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_selectable_constructors.py:441(select)\n 86 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3850(__init__)\n 116 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:305(_connection_insp)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1347(insert)\n 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:553(orm_setup_cursor_result)\n 50 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:437(expect_col_expression_collection)\n 236 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:261()\n 459 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:330()\n 5478 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:1375(cast)\n 32 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1022(adapt)\n 306 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1240(driver_connection)\n 456 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:250(compile)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1633(_populate_column_collection)\n 51 0.000 0.000 0.001 0.000 :1(where)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:801(_generate_fromclause_column_proxies)\n 62 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:625()\n 178 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5515(_add_table)\n 42 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5124(__init__)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1719(_getitem_axis)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1687(_populate_separate_keys)\n 1252 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:516(_inc_counter)\n 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1179(_setup_for_generate)\n 1384 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1295(_fallback_getattr)\n 21 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:259(schedule)\n 544 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4144(_set_parent)\n 14 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4439(__init__)\n 237 0.000 0.000 0.001 0.000 {method 'discard' of 'set' objects}\n 74 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1548(base_columns)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4650(_check_constraint_query)\n 1738 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:173(_get_table_key)\n 792 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/row.py:156(_mapping)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:103(__init__)\n 60 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:509(__init__)\n 34 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1137(scalars)\n 60 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:267(__init__)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:441(execute)\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:655(write)\n 12 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6679(_maybe_cast_listlike_indexer)\n 1705 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1578()\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1692()\n 3315 0.001 0.000 0.001 0.000 {method 'startswith' of 'str' objects}\n 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:78(instances)\n 4170 0.001 0.000 0.001 0.000 {built-in method builtins.hash}\n 440/424 0.000 0.000 0.001 0.000 {method 'extend' of 'list' objects}\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4614(_get_item_cache)\n 150 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2128(_fetchone_impl)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:535(generate_dispatch)\n 34 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1706(__init__)\n 34 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:804()\n3435/3431 0.001 0.000 0.001 0.000 {built-in method builtins.setattr}\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:548(_generate_dispatcher)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2901(__init__)\n 42 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5131()\n 32 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2594(_make_proxy)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2039(_connection_for_bind)\n 2990 0.001 0.000 0.001 0.000 {method 'group' of 're.Match' objects}\n 48 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5229(_set_parent)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3251(_pg_class_filter_scope_schema)\n 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:4323(reindex)\n 890 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1740(set_creation_order)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:450(operate)\n 32 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1403(constructor_copy)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2427(visit_grouping)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:475(operate)\n 39 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:747(_literal_coercion)\n 36 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2909()\n 566 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2233(_extra_kwargs)\n 30 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:253(_reduce)\n 166 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:407(__iter__)\n 21 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/zmq/sugar/socket.py:621(send)\n 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3256(connect)\n 148 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:223(split)\n 180 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1772(as_readonly)\n 376 0.000 0.000 0.001 0.000 {method 'update' of 'set' objects}\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:577(_schedule_flush)\n 22/8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:838(in_)\n 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:131(__init__)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:28(_event_key)\n 63 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1314(fetchall)\n 176 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:218(get_converter)\n 14/8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2099(in_op)\n 18/16 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:216(_copy_internals)\n 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2735(setup_compile_state)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4596(_comment_query)\n 1005 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1689(isEnabledFor)\n 459 0.001 0.000 0.001 0.000 {built-in method _operator.or_}\n 2759 0.001 0.000 0.001 0.000 {method 'items' of 'dict' objects}\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6956(insert)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4596(_box_col_values)\n 176 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7685(format_column)\n 12 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4616(_normalize_froms)\n 7 0.001 0.000 0.001 0.000 {built-in method builtins.exec}\n 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:744(_setup_entity_query)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:951(__call__)\n 33 0.000 0.000 0.001 0.000 :1(order_by)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:157(concat)\n 41 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6235(_all_selected_columns)\n 18 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6281(__getattr__)\n 264 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1186(mappings)\n 306 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:679(driver_connection)\n 599 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:271(inner)\n 10/8 0.000 0.000 0.001 0.000 :1(_connection_for_bind)\n 56 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2082(update)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:337(_exec_code_in_env)\n 352 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1969(process)\n 60 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6678(visit_drop_table)\n 566 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4286()\n 178 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2135(__hash__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:113()\n 98 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:188(match)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3283(_has_table_query)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1384(_checkin)\n 270 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3671(_handle_array_type)\n 1831 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:589(append)\n 78 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1757(get_result_processor)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:1305(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6429(dtypes)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4592(_get_froms)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1012(iget)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:631(orm_pre_session_exec)\n 506 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/string.py:258(parse)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1552(proxy_set)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:148(_generate_cache_attrs)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3648(visit_bindparam)\n 682 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2289(to_instance)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2689(row_processor)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:157(__init__)\n 10/8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1107(_connection_for_bind)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:212(_in_impl)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:369(__eq__)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3131(_set_target_column)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:358(append_to_list)\n 1 0.000 0.000 0.000 0.000 :1002(_find_and_load)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:917(_finalize_fairy)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1035(setup)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2778()\n 68 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6298(__setattr__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6394(_should_compare)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3863(_table_oids_query)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2780()\n 842 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:832(columns)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:622(get_result)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:811(_instance_processor)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1776(_bind_processors)\n 124 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:794(_merge_cols_by_none)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3024(_column_tokens)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1507(close)\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:289(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1547(itertuples)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2648(visit_ARRAY)\n 972 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1364(__init__)\n 90/40 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:423(get_cls_kwargs)\n 1 0.000 0.000 0.000 0.000 :967(_find_and_load_unlocked)\n 180 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2095(__init__)\n 122 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:737(_generate)\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2583(visit_column)\n 51 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5940(where)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1157(maybe_infer_to_datetimelike)\n 302 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:911(foreign_key_constraints)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:994(_get_context_loader)\n 180 0.000 0.000 0.000 0.000 {method 'match' of 're.Pattern' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3281(raw_connection)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4140(order_by)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:120(_stored_in_collection)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2525(visit_label)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:668(__init__)\n2038/2035 0.000 0.000 0.000 0.000 {built-in method builtins.iter}\n 104 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5279(__new__)\n 78 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:916(_cached_result_processor)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:332(for_modify)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1550(makeRecord)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:5346(reindex)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:783(compile)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5340(reindex)\n 95 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2134(_gen_cache_key)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1618()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:441(connect)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1081(is_numeric_dtype)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:282(__init__)\n 1291 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3449(intersection)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1255(_checkout)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:335(_accept_with)\n 978 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1064(soft_close)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:986(_gen_dialect_impl)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:252(visit_clauseelement)\n 47/34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:786(_getitem)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6233(__finalize__)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:234(contextmanager)\n 78 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1313(oneshot)\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:925(traverse)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5611(_reindex_axes)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3051(_set_parent_with_dispatch)\n 148 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:760(get)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:552(__get__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:640(execute)\n 180 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:77()\n 60 0.000 0.000 0.000 0.000 {built-in method posix.urandom}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:40(save_obj)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:844(visit_setup_join_tuple)\n 151 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1375()\n 90 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:847(__init__)\n 1838 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/sql.py:191(as_string)\n 264 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:862(__contains__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:179(get)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:124(_annotate)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:516(run_generated_dispatch)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:274(full)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:504(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/_utils.py:23(to_numpy_dtype_inference)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4034(get_multi_pk_constraint)\n 60 0.000 0.000 0.000 0.000 {built-in method _abc._abc_init}\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:852()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:674(_with_infer)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:269(_as_annotated_instance)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:339(_from_mgr)\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4772(all_selected_columns)\n 296 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:42(_instancecheck)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5437(_can_hold_identifiers_and_holds_name)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:707(checkout)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2876(query)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:672(_constructor_sliced_from_mgr)\n 822 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:436(__getitem__)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1006(copy)\n 270 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1642()\n 124 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:648(_colnames_from_description)\n 272 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:844(__init__)\n 69 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4179()\n 204 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:252(_init_connection)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:235(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:389(_generate_actions)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1151(_with_parent)\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:873(traverse_using)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:580(type_descriptor)\n 270 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:2052(quoted_token_parser)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1171(_optimized_compare)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:577()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5281(__repr__)\n 300 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:607(validate_identifier)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:231(asarray_tuplesafe)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:957(_validate_tuple_indexer)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:281(_set_entities)\n 1 0.000 0.000 0.000 0.000 :659(_load_unlocked)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4744(_validate_dest_table)\n 238 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:214(schema_for_object)\n 672 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:537(__init__)\n 82 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/pg_catalog.py:50(process)\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1555(values)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:982(cloned_traverse)\n 1372 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1746(pre_exec)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:518(execute)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5287()\n 165 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:524(dialect_kwargs)\n 55 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1434(_is_dtype_type)\n 197 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2658(post_process_text)\n 3/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:1035(clone)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:410(_deep_annotate)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:173()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:761(__missing__)\n 3/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:433(clone)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:386()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2313(is_unique)\n 176 0.000 0.000 0.000 0.000 {built-in method builtins.any}\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:127()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:949(_do_pre_synchronize_auto)\n 540 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1215(_reset_memoizations)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2260(__repr__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:842(_engine)\n 234 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:369(_key)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:455(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:916(format)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1143(_reset)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3955(_get_indexer)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3095(_link_to_col_by_colstring)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:278(__init__)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2744(inferred_type)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:538(__init__)\n 178 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:159(_insert_item)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1445(__init__)\n 1556 0.000 0.000 0.000 0.000 {method 'isdigit' of 'str' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:751(checkin)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:534()\n 15 0.000 0.000 0.000 0.000 :1(join)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2301(adapt_type)\n 402 0.000 0.000 0.000 0.000 {method 'difference' of 'set' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:650(format)\n 166 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:775(keys)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3535(_intersection)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4494(_tq_label)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1484(items)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5660(_reindex_with_indexers)\n 1056 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:787(name)\n 122 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:936(__init__)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:404()\n 258 0.000 0.000 0.000 0.000 {method 'split' of 're.Pattern' objects}\n 264 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1993(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:944(parse)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:866(_instantiate_types)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1111(get_multi_table_options)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:683(__init__)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1001(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:301(_engine_insp)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:287()\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1596(pandas_dtype)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:649(_simple_new)\n 1028 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1267(memo)\n 1 0.000 0.000 0.000 0.000 :844(exec_module)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4894(_gen_tq_label)\n 306 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1641(no_parameters)\n 160 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7457(quote_schema)\n 120 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:426(__init__)\n 704 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3649()\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1574(_validate_key)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:97(is_bool_indexer)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:245(_init_engine)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/functools.py:35(update_wrapper)\n 84 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:816(iterate)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:902(_sorted_constraints)\n 42 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:246(_select_iterables)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2526(to_compile_state)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1784()\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4203()\n 147 0.000 0.000 0.000 0.000 :398(parent)\n 376 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1987()\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1146(take)\n 352 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1939(_strict_as_bool)\n 4/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:436(_parse_sub)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:451(_return_conn)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4060(_memo)\n 61 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4425(__len__)\n 196/188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:797()\n 122 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:146(__new__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:699(new_axes)\n 443 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1820(load_dialect_impl)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:702()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2253(_fetchone_impl)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1083(_remove_snapshot)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5552(equals)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1955(filter_by)\n 62 0.000 0.000 0.000 0.000 {built-in method builtins.all}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1052(_do_pre_synchronize_fetch)\n 4/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:494(_parse)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:669(_sliced_from_mgr)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6520(_transform_index)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6312(_index_as_unique)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:144(_do_return_conn)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:449(__init__)\n 1 0.000 0.000 0.000 0.000 :916(get_code)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:185(and_)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3640()\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4026(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3569(_intersection_via_get_indexer)\n 440 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3472(_prepare_filter_names)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3124(and_)\n 296 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:37(_check)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:491(__call__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4656()\n 88 0.000 0.000 0.000 0.000 :2(__init__)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7289(__init__)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4745()\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1438(self_group)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5132(returning_clause)\n 296 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2673(_unquote_identifier)\n 264 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1667(_fetchiter_impl)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:533(_new_annotation_type)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1375(_is_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:717(_get_concat_axis)\n 176 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6885(get_column_default_string)\n 98 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2454(is_boolean)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:153(_do_get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:728(_emit_update_statements)\n 382 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:93()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3021(_construct)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2712(__init__)\n 249 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:340(__init__)\n 78 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5154(_memoized_method_lower)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:423(dict_to_mgr)\n 64 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:157(split_batch_into_tables)\n 974 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:132(put)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:870(_post_coercion)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5448(outerjoin)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5269(join)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1983()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2978(_process_clauses_for_boolean)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:274(make_block)\n 540 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:700(visitor_iterator)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:549(find)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5323(__contains__)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1373()\n 266 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1570(__bool__)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:137(is_object_dtype)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:798(begin)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:180(blknos)\n 332 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:570(connection)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2093(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1670(_fetchone_impl)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2117(_clone)\n 280 0.000 0.000 0.000 0.000 {method 'endswith' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3078(visit_unary)\n 87 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:117(__instancecheck__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6219(_raise_if_missing)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:659(_constructor_from_mgr)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:994(_static_cache_key)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5170(_get_engine_target)\n 172 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:426(__init__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1814(_autobegin_t)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4776(_setup_joins)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3480(_generate_generic_unary_operator)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:463(orm_pre_session_exec)\n 176 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:659(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/lib/function_base.py:5369(insert)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5140()\n 264 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:652(visit_string_list)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2760(_generate_delimited_list)\n 270 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1240()\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:103(_collection_gced)\n 324 0.000 0.000 0.000 0.000 {built-in method time.perf_counter}\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:128()\n 57 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3775(_resolve_value_to_type)\n 120 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:114(ignored_name)\n 88 0.000 0.000 0.000 0.000 {method 'throw' of 'generator' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:880(__init__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:82(shape)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1593(__getitem__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1093(adapt)\n 224 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:408()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4411(_label_returning_column)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1277(is_extension_array_dtype)\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1559()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1829(create_for_statement)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1720(visit_array)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2761()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:553(_statement_20)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5373(__getitem__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1436(adapt)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4976()\n 156 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4220()\n 506 0.000 0.000 0.000 0.000 {built-in method _string.formatter_parser}\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:405(_clone)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6415(_is_comparable_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2787(visit_clauselist)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2763()\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:434(_ensure_has_table_connection)\n 146 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5131(construct)\n 121 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1573(__len__)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1146(reset)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:174(get)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2679(__init__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:459(bindparam)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:622(_code)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:333(hex)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:719(case)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3019()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:86(_annotations_cache_key)\n 128 0.000 0.000 0.000 0.000 {method 'getquoted' of 'psycopg2.extensions.QuotedString' objects}\n 228 0.000 0.000 0.000 0.000 {method 'difference_update' of 'set' objects}\n 192 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1615(__getattr__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:749(update_stmt)\n 1 0.000 0.000 0.000 0.000 :901(_find_spec)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:582(formatTime)\n 7 0.000 0.000 0.000 0.000 :1(select_from)\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:689(get_plugin_class)\n 15 0.000 0.000 0.000 0.000 {method 'reduce' of 'numpy.ufunc' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2847(__clause_element__)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2808(self_group)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3296(__init__)\n 82 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/pg_catalog.py:53()\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:544(__len__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:234(__init__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7688(maybe_extract_name)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:248(any_)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7567(format_label)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1939(_reflect_table_comment)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:92(_gen_annotations_cache_key)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2765(check_dict_or_set_indexers)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:245(get_attribute_history)\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:81(ismethod)\n 84 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:798(_post_coercion)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1398(_reset)\n 180 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:912(__str__)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/interfaces.py:1689(get_table_options)\n 376 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1996()\n 266 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4933()\n 142 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:412(_gen_cache_key)\n 159 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1786()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4835(_join_determine_implicit_left_side)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:677(execute)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2615(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:586()\n 176 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4000()\n 376 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1989()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2728()\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1133(is_alive)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:168(delete_obj)\n 110 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2431(is_comparison)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:273(is_dict_like)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1586(_simple_statement)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3923(bindparam_string)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:766(visit_clauseelement_tuples)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1514(findCaller)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3719(_create_any)\n 302 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:930()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6324(_maybe_downcast_for_indexing)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1412(adapt_to_emulated)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:699(visit_has_cache_key_tuples)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:583(copy_func)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7125(visit_VARCHAR)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:468(presort_saves)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:239(maybe_convert_indices)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1777(first)\n 148 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:539(_implicit_coercions)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:299(generate)\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2558(is_precedent)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1512(_close_special)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3777(get_loc)\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4555(go)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:711()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2332(_soft_close)\n 130 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:909(__len__)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:895(visit_plain_dict)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1802(one)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1052(create_row_processor)\n 60 0.000 0.000 0.000 0.000 {method '__exit__' of 'psycopg2.extensions.cursor' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:364(result_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:405(__init__)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1667(_validate_integer)\n 37 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:388(_inspect_func_args)\n 87 0.000 0.000 0.000 0.000 {built-in method _abc._abc_instancecheck}\n 204 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:351()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:712()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2701(_connection_begin_impl)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:536(is_string_dtype)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:380(__clause_element__)\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:805(__init__)\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:788()\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:159(isfunction)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3899(_truncated_identifier)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2116(type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:237(__exit__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3886(_truncate_bindparam)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2937(_get_colspec)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:34(using_copy_on_write)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:458(__enter__)\n 1 0.000 0.000 0.000 0.000 :1415(find_spec)\n 4/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:1034(is_not)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2990(_table_key)\n 156 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4222()\n 39 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:585(_get_axis)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1861(from_array)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4874(_setup_select_stack)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:465(__getattr__)\n 1 0.000 0.000 0.000 0.000 :1383(_get_spec)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:616(_literal_coercion)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1010(view)\n 42 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:254()\n 113 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:181(__init__)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4132(table)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:339(dispatch_is)\n 6/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:87(_compile)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/json/__init__.py:183(dumps)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3931(_from_objects)\n 166 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:802(__init__)\n 3 0.000 0.000 0.000 0.000 :1(filter)\n 1 0.000 0.000 0.000 0.000 :645(_compile_bytecode)\n 165 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:366(__init__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:286(__init__)\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:261(iscode)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2005(is_not)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1217(visit_bindparam)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:806(_set_axis)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1066(iset)\n 45 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:84()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:106(get_dialect_kwargs)\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1701()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:147(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4606()\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:836(__iter__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:262(__init__)\n 1 0.000 0.000 0.000 0.000 :1514(find_spec)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:510(_validate_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:306(register_object)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2716(new_block)\n 4 0.000 0.000 0.000 0.000 {method 'sum' of 'numpy.ndarray' objects}\n 1 0.000 0.000 0.000 0.000 {built-in method marshal.loads}\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:346(_name_get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1239(_get_attr_w_warn_on_none)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1739(_connections_for_states)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1780(_consolidate_inplace)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2063(effective_value)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:664(_constructor_from_mgr)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/to_file_like_obj.py:25(FileLikeObj)\n 1 0.000 0.000 0.000 0.000 :1(with_only_columns)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:827(_values)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1693(label)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1652(_is_scalar_access)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1297(_go)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6664()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/json/encoder.py:182(encode)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:528(__init__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1094(_begin_impl)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:287(get_dtypes)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5995(select_from)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:587(select)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1762(is_consolidated)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2902(_for_columns)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:555(_initialize_instance)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2645(maybe_coerce_values)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:145(_get_option)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:862(_metadata_for_keys)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:808(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:346(_per_mapper_flush_actions)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1732()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:694(_expire)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1029(_take_snapshot)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5215(visit_table)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1404(_emit_delete_statements)\n 168 0.000 0.000 0.000 0.000 {method 'union' of 'set' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:262(__init__)\n 67 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:455(__contains__)\n 39 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:508(dispatch)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:458(get_children)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:130(filterwarnings)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2517(get_property_by_column)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5263(visit_join)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5841(with_only_columns)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:920(_eval_condition_from_statement)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:234(prop_has_changes)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:203(find_left_clause_to_join_from)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1041(_text_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:96(arrays_to_mgr)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:214(is_extension)\n 123 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:54()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:327(_memoized_attr_expression)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_methods.py:47(_sum)\n 177 0.000 0.000 0.000 0.000 {method 'copy' of 'dict' objects}\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1462(_set_as_cached)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1770(_consolidate_check)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4226()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:165(__setitem__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4593(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1415(delete_stmt)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1412(_insert_update_blklocs_and_blknos)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7109(_render_string_type)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:786(__add__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/array_algos/take.py:564(_take_preprocess_indexer_and_fill_value)\n 49 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1579(__get__)\n 270 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/_json.py:159(typecast_json)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3105(__init__)\n 306 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1029(get_driver_connection)\n 100 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2087()\n 122 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:865(__init__)\n 200 0.000 0.000 0.000 0.000 {method 'get' of 'mappingproxy' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2233(_soft_close)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2111(__eq__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:1393(moveaxis)\n 1 0.000 0.000 0.000 0.000 :1(limit)\n 156 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4253()\n 152 0.000 0.000 0.000 0.000 {method 'rpartition' of 'str' objects}\n 176 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4893()\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:957(_post_coercion)\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1069(hard_close)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1484(dictlike_iteritems)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:713(warn)\n 4/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/evaluator.py:64(process)\n 86 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2473(is_associative)\n 97 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:389(__bool__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2811(ensure_block_shape)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:211(_organize_states_for_save)\n 100 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2085()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:773(_view)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1872(__init__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:418(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1036(shape)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:882(safe_merge)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:51(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1261(set)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3587(visit_not_like_op_binary)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:999(__len__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:398(_safe_annotate)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:219(_can_consolidate)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2229(is_monotonic_increasing)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/evaluator.py:161(visit_binary)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:257(_adjust_fn_spec)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/decl_api.py:1867(_inspect_decl_meta)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2703(new_block_2d)\n 1 0.000 0.000 0.000 0.000 :1036(get_data)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4009(_sorted_tables)\n 113 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1036(coerce_compared_value)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/decl_base.py:2126(_declarative_constructor)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:974(_is_nested_tuple_indexer)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4230()\n 148 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1550()\n 88 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4422()\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6031()\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4391(_add_to_result_map)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5213(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:832(__getitem__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/algorithms.py:1131(take)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2580(limit)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1022(_literal_coercion)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:256(__enter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5913()\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1914(_set_table)\n 2/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:623(__gt__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3583(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/json/encoder.py:204(iterencode)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3573(_get_state_attr_by_column)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2104(__repr__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4647()\n 40 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:334(is_hashable)\n 178 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2136()\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:87(allows_duplicate_labels)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:572(condition)\n 144 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1372()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1404(_offset_or_limit_clause)\n 100 0.000 0.000 0.000 0.000 {built-in method from_iterable}\n 1 0.000 0.000 0.000 0.000 :558(module_from_spec)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:468(finalize_flush_changes)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3189(setup_compile_state)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2795(extend_blocks)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1424(_next)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:464(_cloned_set)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:236(_from_objects)\n 1 0.000 0.000 0.000 0.000 {built-in method _operator.gt}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:181(_add_filter)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1776()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:417(to_list)\n 206 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:92()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:973(_gen_cache_key_inst)\n 53 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:97()\n 74 0.000 0.000 0.000 0.000 {method 'union' of 'frozenset' objects}\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2002(internal_values)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:676(lint)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/lib/function_base.py:5563(append)\n 2/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:584(__ne__)\n 128 0.000 0.000 0.000 0.000 {method 'decode' of 'bytes' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2112(_with_binary_element_type)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:228(__init__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:43(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:140(basename)\n 102 0.000 0.000 0.000 0.000 {method 'groups' of 're.Match' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:793(_set_axis_nocheck)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:325(_subx)\n 82 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1411()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:187(_join)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:374(get_history)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/typing_extensions.py:582(__instancecheck__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1172(sort_tables)\n 63 0.000 0.000 0.000 0.000 {built-in method from_bytes}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:464(__eq__)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1550(keys)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3989(_set_parent)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:693(_sanitize_ndim)\n 7 0.000 0.000 0.000 0.000 {built-in method time.localtime}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2171(name)\n 69 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:388()\n 1 0.000 0.000 0.000 0.000 {built-in method _operator.ne}\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:495()\n 7 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/results.py:58(metadata)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:117(splitext)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:165(simplefilter)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:255(get)\n 8 0.000 0.000 0.000 0.000 {method 'any' of 'numpy.ndarray' objects}\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:515(_inspect_mapped_class)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6253(delete_table_clause)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1402(_insert_update_mgr_locs)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2365(shape)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:536(__set__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2228(construct_from_string)\n 151 0.000 0.000 0.000 0.000 {method 'release' of '_thread.lock' objects}\n 1 0.000 0.000 0.000 0.000 :486(_init_module_attrs)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:749(__repr__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pydantic/main.py:737(__getattr__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:236(set_axis)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1487(__getattr__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1606(_select_statement)\n 148 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n 90 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:906()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:106(remove)\n 176 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6633()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1162(_getter)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:481(ensure_wrapped_if_datetimelike)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1399(_get_dtype)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1122()\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:197(_validate_ndim)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:435(__array_finalize__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:900(_cached_bind_processor)\n 62 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:470()\n 47 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:300()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3110(_construct_raw)\n 120 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:306(_should_execute)\n 47 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3719(__init__)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1331(is_ea_or_datetimelike_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/evaluator.py:87(visit_column)\n 150 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:268()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1072(_literal_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2119(create_block_manager_from_column_arrays)\n 118 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5144(_values)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3275(_register_persistent)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3996(_check_indexing_method)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:127(_get_single_key)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:296(_annotate)\n 92 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:948(from_blocks)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:634(formatMessage)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1181()\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1066(_wait_for_tstate_lock)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:849(cast)\n 1 0.000 0.000 0.000 0.000 :220(_call_with_frames_removed)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/functools.py:65(wraps)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1466(assert_arg_type)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:551(maybe_promote)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2776(_is_multi)\n 7 0.000 0.000 0.000 0.000 {built-in method time.strftime}\n 128 0.000 0.000 0.000 0.000 {method 'prepare' of 'psycopg2.extensions.QuotedString' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:1713(__init__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:203(setup_query)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1765(_sort_states)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2871(select_identity_token)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3054()\n 3 0.000 0.000 0.000 0.000 {method 'max' of 'numpy.ndarray' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:328(merge)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:780(name)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:80(_memoized_attr_ref)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:469(keys)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1631(__len__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:437(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1265(_iset_single)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:666(_info_axis)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/typing.py:310(is_non_string_iterable)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2674(get_block_type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3064(_row_limit_clause)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:259(__exit__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_methods.py:55(_any)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:377(__init__)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:428(__setitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:1()\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2738(is_label_like)\n 124 0.000 0.000 0.000 0.000 {method 'isdisjoint' of 'set' objects}\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4774()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:841(copy_with)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:351(notify)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2188(_form_blocks)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3391(__init__)\n 3 0.000 0.000 0.000 0.000 :135(_path_stat)\n 124 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:991(soft_close)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:101(_should_log_debug)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2985(_autoflush)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5167(_find_columns)\n 121 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1332()\n 44 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:220(_resolve_for_literal)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3514(scalar_subquery)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:628(usesTime)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:160(set)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1883(limit_clause)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5398(apply_map)\n 2 0.000 0.000 0.000 0.000 :1(connection)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:262()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4611(_clear_item_cache)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_dml_constructors.py:116(delete)\n 2/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:187(__invert__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3684(_from_objects)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5111(_create_raw_select)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:368(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:907(from_execution_options)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:166(_getter)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1178(__init__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:152(self_group)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:637(_pks_changed)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2303(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_methods.py:39(_amax)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:739(__init__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2372(iget)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2234(_tuples_to_blocks_no_consolidate)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:609(_dtype_to_subclass)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:1330(normalize_axis_tuple)\n 1 0.000 0.000 0.000 0.000 :156(__enter__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:432(format)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:427(_collect_update_commands)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:205(_make_extra_froms)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3498(_identity_key_from_state)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3070(_get_operator_dispatch)\n 1 0.000 0.000 0.000 0.000 {built-in method io.open_code}\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:477(__exit__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:560(_compile_info)\n 1 0.000 0.000 0.000 0.000 :1(return_defaults)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1067()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:242()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:115(__eq__)\n 3 0.000 0.000 0.000 0.000 {built-in method posix.stat}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:392(_get_collection_history)\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:484()\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6657()\n 1 0.000 0.000 0.000 0.000 {built-in method _operator.inv}\n 148 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:545(_literal_coercion)\n 65 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:794(dtype)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1492(__getattr__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:819(get_connection)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2236()\n 67 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2064(table_comment)\n 2 0.000 0.000 0.000 0.000 :361(cache_from_source)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:372(apply_if_callable)\n 50 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:571(_get_axis_number)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:466(array_equivalent)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1175(_tuple_getter)\n 96 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:354(_listen_fn)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:591(_ensure_array)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:303(_organize_states_for_delete)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:331(filter)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:726(alias)\n 39 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:366(__hash__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7723(_unpack_nested_dtype)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4600()\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1835(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1086(skip_for_returning)\n 13 0.000 0.000 0.000 0.000 {method 'take' of 'numpy.ndarray' objects}\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:946(__init__)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:152(cast_scalar_indexer)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:1181(_path_registry)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3699(self_group)\n 86 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2056(check_constraints)\n 6 0.000 0.000 0.000 0.000 {built-in method builtins.sum}\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:896(acquire)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4872()\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:791(is_)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:416(extract_array)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4055(_from_objects)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:252(_key)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:116()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1865(filter)\n 2 0.000 0.000 0.000 0.000 :385(cached)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5228(_with_annotations)\n 134 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:200(_copy_internals)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:86(_validate_set_axis)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:545(_get_sample_object)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7060(visit_NUMERIC)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:456(_engine_type)\n 90 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:900()\n 133 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:197(_clone)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:346(_state_constructor)\n 46 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:165(__getitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:114(_get_crud_params)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6628()\n 7/3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:175(getwidth)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4308(_is_clean)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:254(_inv_impl)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:352(_clone)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4573(_ensure_valid_index)\n 48 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:340()\n 2 0.000 0.000 0.000 0.000 :1(correlate)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:301(maybe_iterable_to_list)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4399(_check_setitem_copy)\n 84 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:442(__setitem__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:732(_sanitize_str_dtypes)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:794(_autobegin)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3313()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1600(_construct)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:160()\n 1 0.000 0.000 0.000 0.000 :491(_get_cached)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:935(_expand_ellipsis)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:707(_get_comb_axis)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:493(_mappers)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:201(_simple_new)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:315(_compile_repl)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:268(_acquire_restore)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:528(_new_state_if_none)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:730(name)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:831(_reset_identity)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:984(connection)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:176(_row_as_tuple_getter)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/sync.py:126(source_modified)\n 42 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3760(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:567()\n 96 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2645(visit_BYTEA)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:123()\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4545(_column_naming_convention)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4026(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1571(validate_all_hashable)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:483(_view)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2300(_is_numeric)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:480(_get_ndims)\n 14 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:285(visit_string_clauseelement_dict)\n 1 0.000 0.000 0.000 0.000 :166(_get_module_lock)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:455(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:322(per_mapper)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/ops/common.py:81(get_op_result_name)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/genericpath.py:121(_splitext)\n 90 0.000 0.000 0.000 0.000 {method 'pop' of 'set' objects}\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:256(with_wrapper)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:429(_format)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:213(_init_global_attributes)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4422(__iter__)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:55(using_pyarrow_string_dtype)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:301(_with_annotations)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:4436(_wrap_reindex_result)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:561()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2934(_from_objects)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:529(is_string_or_object_np_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:981(parse_template)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:635(_get_root)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:711(_get_plugin_class_for_plugin)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1358(current_thread)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3970(__init__)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:234(__next)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:986(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:459()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/array_algos/take.py:325(_get_take_nd_function)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:421(usesTime)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4479(_tq_key_label)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:909()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:858(_modified_event)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:156(_adjust_fn_spec)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1040(needs_i8_conversion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5117(order_by_clause)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3565(_wrap_intersection_result)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:184(is_duration)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:977(__and__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:138(_event_pipe)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:728(is_valid_na_for_dtype)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:348(__new__)\n 6 0.000 0.000 0.000 0.000 :121(_path_join)\n 58 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1072(_effective_plugin_target)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2215(construct_array_type)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1563()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:225(_full)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2310(_select_args)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:131()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3029(update)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:981()\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:333(_de_clone)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:596(_homogenize)\n 68 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1695()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3161(relationships)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:104(_should_log_info)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_internal.py:920(npy_ctypes_check)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1305(construct_from_string)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2924(_polymorphic_properties)\n 43 0.000 0.000 0.000 0.000 {method 'intersection' of 'set' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3439(_wrap_setop_result)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2309(_fast_count_smallints)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:831(construct_from_string)\n 7 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/results.py:83(_get_results_type)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:363(ndim)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1270(is_1d_only_ea_dtype)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:1066(expand_template)\n 74 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1309(_proxies)\n 65 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:404(flags)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:505(get_rename_function)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:1422(where)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:830(__add__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:394(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:386(default_index)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:564(_array_equivalent_object)\n 3 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:20(get_schema_table_names)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:962(_emit_insert_statements)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:27(__exit__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:441(__getattribute__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:785(values)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:903(release)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1061()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:836(_index_for_key)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:903()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2178(numpy_dtype)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:85(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2158(_entity_namespace_key)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1612(_init)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:775(__init__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:718(dtype)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:1152(create_row_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1638(_soft_close)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5295(_validate_fill_value)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:310()\n 70 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1544(_select_iterable)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:591(_get_block_manager_axis)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pydantic/fields.py:843(__getattr__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6469(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:341(opt_manager_of_class)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1598(_proxy_key)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:505(parent_header)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:535(_still_open_and_dbapi_connection_is_valid)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:999()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:383(object_mapper)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:443(_column_naming_convention)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:346(shape)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:827(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3209(_filter_properties)\n 1 0.000 0.000 0.000 0.000 :1(_begin)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6039(correlate)\n 17 0.000 0.000 0.000 0.000 :1()\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:196(blklocs)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2888(selectable)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:322(_expand_cloned)\n 7 0.000 0.000 0.000 0.000 {method 'remove' of 'list' objects}\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:437(_append_dedupe_col_collection)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:763(_try_cast)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:459(return_defaults)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6666()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:292(make_block_same_class)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2318(_preprocess_slice_or_indexer)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:121(classes)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:494(_clean_keys_and_objs)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:228(_put)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:1527(__init__)\n 20 0.000 0.000 0.000 0.000 {method '__enter__' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:72(get_objs_combined_axis)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2689(_deactivate_from_connection)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3035(unique)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/iostream.py:550(_is_master_process)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:713(__setattr__)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:264()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:231(_get)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7429(quote_identifier)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:358(getMessage)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1848(from_blocks)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:4440(_maybe_preserve_names)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5595()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/fromnumeric.py:1768(ravel)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2283(null_result)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:55(allows_duplicate_labels)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:62(is_list_like_indexer)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1998(external_values)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:321()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1225(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:52(normcase)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:195(is_array_like)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:64(is_integer)\n 4 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:6879(create_table_suffix)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3766(_convert_can_do_setop)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:457(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:119(getLevelName)\n 38 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2044(foreign_keys)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2772()\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1392()\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:362(attrs)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:932(_init_collections)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1553()\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1180()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:676(_translate_key)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4376(_set_is_copy)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:292(_optimize_charset)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:419()\n 23 0.000 0.000 0.000 0.000 {method 'rfind' of 'str' objects}\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:283(__new__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:161(iloc)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:97(closegroup)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:830(_hasna)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:536(dict)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:901(_post_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/fromnumeric.py:865(sort)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2936()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:71(per_property_preprocessors)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:589(_has_bind_expression)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:992(_validate_key_length)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:311(is_null_slice)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1054(construct_from_string)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2964(row_processor)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3836(set_label_style)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:677(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:770(_type_affinity)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:256()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:225(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:353(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1457(_negate)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:791(filter)\n 49 0.000 0.000 0.000 0.000 {built-in method _warnings._filters_mutated}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2379(as_state)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1424(debug)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/writeonly.py:124(as_history)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5201(__get__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:473(na_value)\n 6 0.000 0.000 0.000 0.000 :1033(_handle_fromlist)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2972(_iterate_polymorphic_properties)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2139(_entity_namespace)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1665()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:481()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3014(insert)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1477(comparator)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:106()\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:126(_classes_and_not_datetimelike)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:101(isna)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:657()\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_typing.py:353(is_quoted_name)\n 1 0.000 0.000 0.000 0.000 :1509(_get_spec)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2357(_adjust_for_extra_criteria)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:257()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:389(standardize_mapping)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:41(_get_sep)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:979(_commit_all_states)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1371(merge)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1253(iget)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:775(infer_dtype_from_scalar)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:695(ndim)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1063(get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:155()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:996(_begin)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2643(_should_select_with_poly_adapter)\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:48()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:183()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:418(__len__)\n 6 0.000 0.000 0.000 0.000 :123()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:209(count_not_none)\n 2 0.000 0.000 0.000 0.000 :127(_path_split)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1503(_finalize_insert_update_commands)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6107()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:568()\n 16 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.RLock' objects}\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:730(mapper)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1938(_block)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:179(is_timestamp)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:956()\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2052(unique_constraints)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:290(persistent)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3045(delete)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2781()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/evaluator.py:75(visit_grouping)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4672(_get_display_froms)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:330(_name_includes_bit_suffix)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/ops/common.py:103(_maybe_match_name)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:301(register_preprocessor)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2692(_with_polymorphic_mappers)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7390(_escape_identifier)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:981(_memoized_attr__wildcard_token)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:270(_loader_impls)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/json/__init__.py:299(loads)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:925(clear)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2639(visit_UUID)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2794(_na_value)\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:977(_gen_cache_key)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:145(_expression_collection_was_a_list)\n 1 0.000 0.000 0.000 0.000 {method 'read' of '_io.BufferedReader' objects}\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2048(indexes)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:410(coerce_generator_arg)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1019(axes)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:637()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:226(is_string)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:175(_expect_state)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:309(__iter__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2632(get_bind)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:837()\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3066()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:787(_setup_delete_return_defaults)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6537()\n 1 0.000 0.000 0.000 0.000 :154(_path_isfile)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1590()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1673(get_history)\n 5 0.000 0.000 0.000 0.000 {built-in method builtins.max}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:48(_kill)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:792(description)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:249(external_values)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2380(_check_configure)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:452(_constructor)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:170(get)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:358(__call__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:371(check_key_length)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:382(states_for_mapper_hierarchy)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:410(visit_string_clauseelement_dict)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:173(append)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:24(_kind_name)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:752(_maybe_repeat)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5646(_needs_reindex_multi)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2199(coerce_compared_value)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:515(_has_column_expression)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:973(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:910(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:795(_adjust_for_extra_criteria)\n 1 0.000 0.000 0.000 0.000 :1(unique)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2525(iterate_properties)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2668(_get_entity_clauses)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_typing.py:349(has_schema_attr)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:184(_isna)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:1212(__init__)\n 1 0.000 0.000 0.000 0.000 :560(_classify_pyc)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:996(_literal_coercion)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1671(name)\n 33 0.000 0.000 0.000 0.000 {method 'popleft' of 'collections.deque' objects}\n 17 0.000 0.000 0.000 0.000 {built-in method _thread.allocate_lock}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:342(construct_from_string)\n 7 0.000 0.000 0.000 0.000 {built-in method _weakref._remove_dead_weakref}\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:42(warn_copy_on_write)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:852(_unique_strategy)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:276()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:685(_sanitize_non_ordered)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3773(__init__)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:377(_order_by_label_element)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1458(_is_native_for_emulated)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1242(get_history)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:2637()\n 44 0.000 0.000 0.000 0.000 {method 'isascii' of 'str' objects}\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3045(_set_parent)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:383(_getitem)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:271(_is_owned)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:865()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:376(__init__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:324(__init__)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/multiarray.py:1080(copyto)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:265(_release_save)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:974(dtype)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:986(_memoized_attr__default_path_loader_key)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:447(_simple)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:536(is_set)\n 1 0.000 0.000 0.000 0.000 :696(spec_from_file_location)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3809()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:342(_resolve_for_literal)\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:376(dtype)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:73(isclass)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:131(coerce_to_immutabledict)\n 21 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:540()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2755(_propkey_to_col)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:241(is_single_block)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5140(_scalar_type)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:246(is_mapped)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:798(tolist)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:403()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/util.py:105(_trans_ctx_check)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4830(get_children)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:161(__len__)\n 1 0.000 0.000 0.000 0.000 :1(options)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:579(_get_axis_name)\n 24 0.000 0.000 0.000 0.000 {built-in method posix.fspath}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3508()\n 1 0.000 0.000 0.000 0.000 :145(_path_is_mode_type)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:388(_commit_removals)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:183(method_is_overridden)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:218(_acquireLock)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:147(__class_getitem__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:941()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:137(_type_check)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:112(check_modified)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1835(construct_from_string)\n 1 0.000 0.000 0.000 0.000 {method '__exit__' of '_io._IOBase' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2288()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:120(_get_combined_index)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1093(name)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:1380()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1544(_hide_froms)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:222(_empty)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2620()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1010(_iterate_self_and_parents)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:855(_indexes_for_keys)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1010(_implicit_coercions)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:391(_from_objects)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:289()\n 23 0.000 0.000 0.000 0.000 {built-in method _thread.get_ident}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:316(_attached)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:288()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2827(external_values)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/pandas_compat.py:660(get_datetimetz_type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4384(_event_on_init)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:872(_gen_cache_key)\n 1 0.000 0.000 0.000 0.000 :160(__exit__)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:792(value)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:107()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:617(_select_options)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:287(tell)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:254(_collection_impl_keys)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:665(_is_dunder)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4856(_from_objects)\n 1 0.000 0.000 0.000 0.000 :1077(path_stats)\n 3 0.000 0.000 0.000 0.000 :79(_unpack_uint32)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2477(_is_orphan)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2222(_gen_static_annotations_cache_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:164(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1249(shape)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:929(__getattr__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:655(_constructor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:77(find_cycles)\n 20 0.000 0.000 0.000 0.000 {method '_is_owned' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 :58(__init__)\n 3 0.000 0.000 0.000 0.000 {method 'nonzero' of 'numpy.ndarray' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2042(_prop_set)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:568(require_length_match)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3059(_from_objects)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1240(_skip_fn)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:553(equals)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:236(is_large_string)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1548(for_context)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5651(identical)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:395(_set_propagate_attrs)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pg_bulk_ingest.py:55(sql_and_copy_from_stdin)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:179(__len__)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:311()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2898(_identity_lookup)\n 1 0.000 0.000 0.000 0.000 :87(acquire)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/core/displayhook.py:258(__call__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:102()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3478(_kind_to_relkinds)\n 7 0.000 0.000 0.000 0.000 {built-in method sys._getframe}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1898(get_select_precolumns)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:371()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4001(_compiled_cache)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:569(_return_orm_returning)\n 2 0.000 0.000 0.000 0.000 {method 'extendleft' of 'collections.deque' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:253(fill_value)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3887(__bool__)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2614()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:209(is_large_binary)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3532(_persistent_sortkey_fn)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2395(from_scalar_attribute)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1707(_get_current_adapter)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3551(_identity_key_props)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/typing_extensions.py:182(_collect_type_vars)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:402(object_state)\n 1 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:165(sqa_profiled)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/multiprocessing/process.py:189(name)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:270(mgr_locs)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:112(__init__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7146(visit_BOOLEAN)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:265(_compile_charset)\n 7 0.000 0.000 0.000 0.000 {method 'find' of 'str' objects}\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:250(match)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1550(_from_objects)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1547()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7652(ensure_has_len)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:496(popitem)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:604(compare_values)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/multiprocessing/process.py:37(current_process)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1697()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:688(do_begin)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3605(result_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:756(_shallow_copy)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:885(mapper)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:673(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3150(driver)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3760(_assert_can_do_setop)\n 9 0.000 0.000 0.000 0.000 {method 'insert' of 'list' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:47(is_null)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5238(type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:160()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:980(_is_transaction_boundary)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:21(__enter__)\n 16 0.000 0.000 0.000 0.000 {method 'release' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3733(adapt_emulated_to_native)\n 1 0.000 0.000 0.000 0.000 {method 'sort' of 'numpy.ndarray' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:880(per_property_preprocessors)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3214()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:375(entity_namespace)\n 9 0.000 0.000 0.000 0.000 {built-in method numpy.asanyarray}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:74(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:352(__init__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:119(is_floating)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1699()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:104(__init_subclass__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:106(_get_distinct_objs)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:712()\n 7 0.000 0.000 0.000 0.000 {built-in method time.time}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:557(__new__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:204(is_binary)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:649(_get_deprecated_option)\n 1 0.000 0.000 0.000 0.000 :112(release)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3336()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:674(_constructor_expanddim)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3969(_has_row_limiting_clause)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:272()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:591(shape)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:670(__new__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1445(is_valid)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:468(_key_getters_for_crud_column)\n 1 0.000 0.000 0.000 0.000 :593(_validate_timestamp_pyc)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:85(opengroup)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:1513(_get_returning_modifiers)\n 4 0.000 0.000 0.000 0.000 {method 'intersection' of 'frozenset' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2700(_post_inspect)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:189(is_time)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:913(__init__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:982(type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2757()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6672(_maybe_cast_indexer)\n 2 0.000 0.000 0.000 0.000 {method 'transpose' of 'numpy.ndarray' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:704()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:619(isstring)\n 4 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:360(_mapper_for_dep)\n 7 0.000 0.000 0.000 0.000 :231(_verbose_message)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:288()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:438(_no_limit_offset)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:885(bind_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:257()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:426(_no_statement_condition)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2339(__bool__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4766()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:246(is_date)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:266(is_decimal)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:990(addgroup)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:323(_deannotate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:121(__subclasscheck__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1760(np_can_hold_element)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:512(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:928(fix_flags)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:227(_releaseLock)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4025(skip)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/missing.py:1073(clean_reindex_fill_method)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:570(get_impl)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:426(_inspect_mapped_object)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3377(iterate_to_root)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3553()\n 7 0.000 0.000 0.000 0.000 {method 'write' of '_io.StringIO' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:17(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:76(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:213(__new__)\n 8 0.000 0.000 0.000 0.000 {method 'setdefault' of 'dict' objects}\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1176(_maybe_disallow_fill)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:82(groups)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3358(_register_altered)\n 6 0.000 0.000 0.000 0.000 {built-in method sys.getrefcount}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6274()\n 1 0.000 0.000 0.000 0.000 :811(find_spec)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:1275(null)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:861(_references)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:182(__init__)\n 7 0.000 0.000 0.000 0.000 {method 'get' of 'ContextVar' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:758(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:215(to_pyarrow_type)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:330()\n 4 0.000 0.000 0.000 0.000 :874(__enter__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1675(getEffectiveLevel)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:344()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:503(__init__)\n 2 0.000 0.000 0.000 0.000 {built-in method _sre.compile}\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:511(f)\n 1 0.000 0.000 0.000 0.000 :185(cb)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/_distutils_hack/__init__.py:89(find_spec)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:225()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1718(unique)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:894(entity)\n 14 0.000 0.000 0.000 0.000 {method 'rstrip' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:485(_get_literal_prefix)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:660(_constructor)\n 2 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:197(data_to_batch)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4508(_non_anon_label)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2211(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:231(memo)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:185(__iter__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1152(_post_coercion)\n 7 0.000 0.000 0.000 0.000 {method '__enter__' of '_thread.lock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4263(_contains_state)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4864(_render_label_in_columns_clause)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1950(can_use_returning)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:857()\n 4 0.000 0.000 0.000 0.000 :129()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:902(result_processor)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:382(__exit__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:703(_resolve_for_literal)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:692(_constructor)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1280(_post_coercion)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2614(_single_table_criterion)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:613(ndim)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/typing_extensions.py:175(_should_collect_from_parameters)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:356(_escape)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:673(na_value_for_dtype)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1041(in_transaction)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3534()\n 4 0.000 0.000 0.000 0.000 {method 'ravel' of 'numpy.ndarray' objects}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:588(_hide_froms)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/exc.py:48(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:231(_propagate_attrs)\n 4 0.000 0.000 0.000 0.000 :878(__exit__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/typing_extensions.py:148(_check_generic)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:185()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1076(options)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2713(_with_polymorphic_selectable)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:213()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1108(_fire_loader_callables)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1316(memo)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:463(tables_from_leftmost)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:449(get_from_identity)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1616(_expression_label)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:324(_against_native_enum)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1424(setup_query)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/core/displayhook.py:70(check_for_underscore)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1747(__enter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2758()\n 1 0.000 0.000 0.000 0.000 :351(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:743()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2204(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:291(arrays)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1036(unique)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/_typing.py:132(is_composite_class)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:635(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:379(__enter__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5287(apply_map)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6267()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1324(_post_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:262(_fast_discard)\n 1 0.000 0.000 0.000 0.000 :35(_new_module)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:878(_state_dict)\n 6 0.000 0.000 0.000 0.000 {built-in method _imp.acquire_lock}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2956(_non_hashable_value)\n 1 0.000 0.000 0.000 0.000 :1(_generated_cache_key_traversal)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:370(remove)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:516(_get_charset_prefix)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:449(has_identity)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:324(_track_last_known_value)\n 1 0.000 0.000 0.000 0.000 :523(_check_name_wrapper)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/compat/numpy/function.py:64(__call__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:239(__eq__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:566(is_executemany)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3345()\n 9 0.000 0.000 0.000 0.000 {built-in method builtins.ord}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:372(_entity_namespace)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:122(__len__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2342(empty)\n 2 0.000 0.000 0.000 0.000 {method 'item' of 'numpy.ndarray' objects}\n 1 0.000 0.000 0.000 0.000 {built-in method _abc._abc_subclasscheck}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:153(__contains__)\n 1 0.000 0.000 0.000 0.000 :1346(_path_importer_cache)\n 2 0.000 0.000 0.000 0.000 {method 'find' of 'bytearray' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:246(items)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1663(_attributes)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:383(__len__)\n 4 0.000 0.000 0.000 0.000 {built-in method numpy.core._multiarray_umath.normalize_axis_index}\n 1 0.000 0.000 0.000 0.000 :1(_generated_copy_internals_traversal)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:859()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1913(_filter_by_zero)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4810(_dirty_states)\n 4 0.000 0.000 0.000 0.000 {built-in method _operator.index}\n 2 0.000 0.000 0.000 0.000 {method 'popitem' of 'dict' objects}\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:421(_supports_2d)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1276(disable)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1155()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1050(presort_saves)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:159(replace)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1471(_clear_item_cache)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:587()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_parse.py:169(__setitem__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:1617(overload)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/fromnumeric.py:1764(_ravel_dispatcher)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1442(create_row_processor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2938(_append_inplace)\n 1 0.000 0.000 0.000 0.000 {method 'tolist' of 'numpy.ndarray' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2999(description)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:872()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1572(_global_attributes)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:179(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:700()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:743()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:148(contains_state)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:444(mapper)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:4306(_validate_can_reindex)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:386(__iter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:134(__getitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:250(_all_key_set)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/evaluator.py:61(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/multiarray.py:153(concatenate)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:477(_get_iscased)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1979(nlevels)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:174(not_none)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2279()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1358(asint)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/lib/function_base.py:5559(_append_dispatcher)\n 1 0.000 0.000 0.000 0.000 :1(_generated_get_children_traversal)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:196(mgr_to_mgr)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2280()\n 6 0.000 0.000 0.000 0.000 {built-in method _imp.release_lock}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4869()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:178()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1767()\n 3 0.000 0.000 0.000 0.000 {method 'bit_length' of 'int' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:226()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:1455()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/sre_compile.py:81(_combine_flags)\n 1 0.000 0.000 0.000 0.000 :152(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:872(_state_dict_inst)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:128(_type_convert)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:736()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:437()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:925(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1219(_assert_no_memoizations)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1718(returned_defaults_rows)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3568(native)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2960(_null_column_type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4732(referred_table)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/records.py:576(_deprecate_shape_0_as_None)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2629(_has_aliased_polymorphic_fromclause)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:303()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/lib/function_base.py:5365(_insert_dispatcher)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:661(_copy_callables)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4515(_check_inplace_and_allows_duplicate_labels)\n 1 0.000 0.000 0.000 0.000 :1006(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:994(hard_close)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:243(result_processor)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2381()\n 2 0.000 0.000 0.000 0.000 {method 'index' of 'tuple' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:1389(_moveaxis_dispatcher)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3143(entity_namespace)\n 1 0.000 0.000 0.000 0.000 {built-in method _imp.is_frozen}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:109()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:480()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:672(__init__)\n 2 0.000 0.000 0.000 0.000 {method 'remove' of 'set' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:757(_generate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5018(_generate_for_statement)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/multiarray.py:892(bincount)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2385()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2606(_instance)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7750(_maybe_try_sort)\n 2 0.000 0.000 0.000 0.000 {method 'encode' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:710(_set_get_options)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:502(_setup_orm_returning)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/ddl.py:1250()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:876()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4653()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3211(_validate_sort_keyword)\n 1 0.000 0.000 0.000 0.000 :1()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/reshape/concat.py:693(_get_result_dim)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:446()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:166(_instance_dict)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:395(visit_clauseelement)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2389()\n 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:349(description)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:326(_collect_insert_commands)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3546(key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:209(has_work)\n 1 0.000 0.000 0.000 0.000 {method 'issuperset' of 'set' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1365(_label)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:690(_collect_delete_commands)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:284()\n 1 0.000 0.000 0.000 0.000 :68(_relax_case)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/fromnumeric.py:861(_sort_dispatcher)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:240(bind_processor)\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.globals}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:109(_dirty_states)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6513(self_group)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:105()\n 1 0.000 0.000 0.000 0.000 :406(has_location)\n 1 0.000 0.000 0.000 0.000 :736(find_spec)\n 1 0.000 0.000 0.000 0.000 :1031(get_filename)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:436(_pending_mutations)\n 1 0.000 0.000 0.000 0.000 :841(create_module)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:234(__enter__)\n 1 0.000 0.000 0.000 0.000 {built-in method _imp._fix_co_filename}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1106()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3786(self_group)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:631(self_group)\n 1 0.000 0.000 0.000 0.000 {method '__init_subclass__' of 'object' objects}\n\n\n\n" - } - ], - "source": [ - "%time\n", - "\n", - "ew_deduped._batch_size = 500_000\n", - "\n", - "with sqa_profiled():\n", - " ew_deduped.to_cmf()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b7639271-0294-4cce-9f69-ad12acbb8765", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
left_idright_idprobability
0b'\\\\s31\\x86\\xbb\\xd0s\\xa2\\x92\\x8a\\xadI< \\xc7^+l...b'v\\xa9=\\x14\\xc2\\xc2~\\xa7\\xbe\\xb9\\xa2\\xe6\\xe2M...1
1b'?@\\xf4\\xa9\\xbeBQ\\xa8\\x7fn\\xcbT\\xac\\xedL\\x05\\...b'\\xf3\\xce\\xa4\\xe4H\\r\\xcf\\xaf\\x11IfH\\xf9\\xc4\\x...1
2b'\\xfe^[\\xea\\xecLt\\x08O\\x0b\\x11.\\xdf*\\xcb\\x89K...b'-I\\xf4:\\xb6\\xeb\\xb4\\xd9\\xbb\\xe0\\xc4\\xb7V4\\xc...1
\n", - "
" - ], - "text/plain": [ - " left_id \\\n", - "0 b'\\\\s31\\x86\\xbb\\xd0s\\xa2\\x92\\x8a\\xadI< \\xc7^+l... \n", - "1 b'?@\\xf4\\xa9\\xbeBQ\\xa8\\x7fn\\xcbT\\xac\\xedL\\x05\\... \n", - "2 b'\\xfe^[\\xea\\xecLt\\x08O\\x0b\\x11.\\xdf*\\xcb\\x89K... \n", - "\n", - " right_id probability \n", - "0 b'v\\xa9=\\x14\\xc2\\xc2~\\xa7\\xbe\\xb9\\xa2\\xe6\\xe2M... 1 \n", - "1 b'\\xf3\\xce\\xa4\\xe4H\\r\\xcf\\xaf\\x11IfH\\xf9\\xc4\\x... 1 \n", - "2 b'-I\\xf4:\\xb6\\xeb\\xb4\\xd9\\xbb\\xe0\\xc4\\xb7V4\\xc... 1 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 90276899 entries, 0 to 90276898\n", - "Data columns (total 3 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 left_id binary[pyarrow]\n", - " 1 right_id binary[pyarrow]\n", - " 2 probability int32[pyarrow] \n", - "dtypes: binary[pyarrow](2), int32[pyarrow](1)\n", - "memory usage: 4.4 GB\n" - ] - } - ], - "source": [ - "with s3.read(path=\"hmrc_exporters_probabilities.parquet\") as f:\n", - " exp_deduped = pd.read_parquet(f, dtype_backend=\"pyarrow\")\n", - "\n", - "exp_deduped.left_id = exp_deduped.left_id.astype(\"binary[pyarrow]\")\n", - "exp_deduped.right_id = exp_deduped.right_id.astype(\"binary[pyarrow]\")\n", - "\n", - "exp_deduped.head(3)\n", - "exp_deduped.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "939e89e8-a2e8-4b29-a52a-96964885d9b3", - "metadata": {}, - "outputs": [], - "source": [ - "all_edges = (\n", - " exp_deduped\n", - " .query(\"probability >= 1\")\n", - " .filter([\"left_id\", \"right_id\"])\n", - " .itertuples(index=False, name=None)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c1c9992e-b544-4693-b7c0-bab103efc322", - "metadata": {}, - "outputs": [], - "source": [ - "G = rx.PyGraph()\n", - "added = {}\n", - "\n", - "for edge in all_edges:\n", - " edge_idx = []\n", - " for sha1 in edge:\n", - " sha1_idx = added.get(sha1)\n", - " if sha1_idx is None:\n", - " sha1_idx = G.add_node(sha1)\n", - " added[sha1] = sha1_idx\n", - " edge_idx.append(sha1_idx)\n", - " edge_idx.append(None)\n", - " _ = G.add_edge(*edge_idx)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "dfadc38c-a5a9-4451-8bb8-d13a1121b82d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "187004" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rx.number_connected_components(G)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "5d92dd7a-d25d-4daa-bcfd-8442ca322486", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "b'\\\\s31\\x86\\xbb\\xd0s\\xa2\\x92\\x8a\\xadI< \\xc7^+l\\xdf'\n" - ] - } - ], - "source": [ - "for edge in all_edges:\n", - " print(edge)\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "85cd00ab-ce8e-4afe-a667-d8e70aa43fbc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0, b'\\\\s31\\x86\\xbb\\xd0s\\xa2\\x92\\x8a\\xadI< \\xc7^+l\\xdf', b'v\\xa9=\\x14\\xc2\\xc2~\\xa7\\xbe\\xb9\\xa2\\xe6\\xe2M\\xca\\x9d\\xf6(\\x0b1')" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " exp_deduped\n", - " .head(100_000)\n", - " .query(\"probability >= 1\")\n", - " .filter([\"left_id\", \"right_id\"])\n", - " .to_records()\n", - ")[0]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.16 64-bit ('company_matching': conda)", - "language": "python", - "name": "python_defaultSpec_1710418206128" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16-final" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/notebooks/engineering/WL_CHxExp.ipynb b/notebooks/engineering/WL_CHxExp.ipynb deleted file mode 100644 index 7cf01dc..0000000 --- a/notebooks/engineering/WL_CHxExp.ipynb +++ /dev/null @@ -1,693 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Companies House x HMRC exporters\n", - "\n", - "I want to build this in a way that one can improve a link pair in a notebook, then deploy those changes to the link easily. I'm going to play with this idea here." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "from cmf.data import utils as du\n", - "from cmf.models import utils as mu\n", - "from cmf.config import tables, stopwords\n", - "from cmf.features.clean_complex import clean_comp_names\n", - "from cmf.link.make_link import LinkDatasets\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "import splink.duckdb.comparison_library as cl\n", - "import splink.duckdb.comparison_template_library as ctl\n", - "\n", - "# import os\n", - "import logging\n", - "import mlflow\n", - "from functools import partial\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import json" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "settings = {\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.name_unusual_tokens = r.name_unusual_tokens)\n", - " and (\n", - " l.name_unusual_tokens <> ''\n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\"\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"name_unusual_tokens\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\")\n", - " ]\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = {\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \"\"\",\n", - " \"recall\": 0.7 \n", - " }\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\n", - " \"max_pairs\": 1e6\n", - " }\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \"\"\"\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "ch_settings = {\n", - " \"name\": '\"companieshouse\".\"companies\"',\n", - " \"select\": [\n", - " \"id::text\",\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ],\n", - " \"preproc\": {\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords\n", - " }\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "exp_settings = {\n", - " \"name\": '\"hmrc\".\"trade__exporters\"',\n", - " \"select\": [\n", - " \"id::text\",\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ],\n", - " \"preproc\": {\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords\n", - " }\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running this as an MLflow experiment" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "ch_x_exp = LinkDatasets(\n", - " table_l = ch_settings,\n", - " table_r = exp_settings,\n", - " settings = settings,\n", - " pipeline = pipeline\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 2.33e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,294,837.17 are expected to match. With 1,368,138,787,675 total possible comparisons, we expect a total of around 318,554.29 matching pairs\n", - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for name_unusual_tokens - Exact match (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - name_unusual_tokens (some u values are not trained, no m values are trained).\n", - " - postcode (no m values are trained).\n", - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - name_unusual_tokens\n", - "\n", - "Iteration 1: Largest change in params was 0.356 in probability_two_random_records_match\n", - "Iteration 2: Largest change in params was 0.0999 in probability_two_random_records_match\n", - "Iteration 3: Largest change in params was -0.0588 in the m_probability of postcode, level `Exact match postcode`\n", - "Iteration 4: Largest change in params was 0.118 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 5: Largest change in params was 0.0585 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 6: Largest change in params was 0.00415 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 7: Largest change in params was 0.000207 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 8: Largest change in params was 1.02e-05 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "\n", - "EM converged after 8 iterations\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - name_unusual_tokens (some u values are not trained, no m values are trained).\n", - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'name_unusual_tokens':\n", - " m values not fully trained\n", - "Comparison: 'name_unusual_tokens':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "ch_x_exp.run_mlflow_experiment(\n", - " run_name=\"Basic linkage\",\n", - " description=\"\"\"\n", - " - Unusual tokens in name\n", - " - Preset postcode distances\n", - " - Eval vs existing service\n", - " \"\"\",\n", - " threshold_match_probability=0.7\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Playing with the pipeline bit by bit" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "ch_x_exp = LinkDatasets(\n", - " table_l = ch_settings,\n", - " table_r = exp_settings,\n", - " settings = settings,\n", - " pipeline = pipeline\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ch_x_exp.get_data()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "ch_x_exp.preprocess_data()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ch_x_exp.create_linker()" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 2.33e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,294,837.17 are expected to match. With 1,368,138,787,675 total possible comparisons, we expect a total of around 318,554.29 matching pairs\n", - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for name_unusual_tokens - Exact match (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - name_unusual_tokens (some u values are not trained, no m values are trained).\n", - " - postcode (no m values are trained).\n", - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - name_unusual_tokens\n", - "\n", - "Iteration 1: Largest change in params was 0.374 in probability_two_random_records_match\n", - "Iteration 2: Largest change in params was -0.0967 in the m_probability of postcode, level `Exact match postcode`\n", - "Iteration 3: Largest change in params was -0.0538 in the m_probability of postcode, level `Exact match postcode`\n", - "Iteration 4: Largest change in params was 0.111 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 5: Largest change in params was 0.0665 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 6: Largest change in params was 0.00538 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 7: Largest change in params was 0.000287 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 8: Largest change in params was 1.5e-05 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "\n", - "EM converged after 8 iterations\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - name_unusual_tokens (some u values are not trained, no m values are trained).\n" - ] - } - ], - "source": [ - "ch_x_exp.train_linker()" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'name_unusual_tokens':\n", - " m values not fully trained\n", - "Comparison: 'name_unusual_tokens':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "ch_x_exp.predict(threshold_match_probability=0.7) " - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'eval_matches': 175842,\n", - " 'pred_matches': 152491,\n", - " 'both_eval_and_pred': 104636,\n", - " 'eval_only': 71206,\n", - " 'pred_only': 47855,\n", - " 'both_eval_and_pred_sample': [{'id_l': '10286497',\n", - " 'id_r': '1038391',\n", - " 'match_probability': 0.9998875592772737,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'KIKKA MARCO LTD',\n", - " 'postcode_l_pred': 'EN6 5AS',\n", - " 'company_name_r_pred': 'KIKKA MARCO LTD',\n", - " 'postcode_r_pred': 'EN6 5AS',\n", - " 'company_name_l_exist': 'KIKKA MARCO LTD',\n", - " 'postcode_l_exist': 'EN6 5AS',\n", - " 'company_name_r_exist': 'KIKKA MARCO LTD',\n", - " 'postcode_r_exist': 'EN6 5AS'},\n", - " {'id_l': '10615340',\n", - " 'id_r': '2498206',\n", - " 'match_probability': 0.9959721759628796,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'A SPEC ENVIRONMENTAL LTD',\n", - " 'postcode_l_pred': 'GU15 3AJ',\n", - " 'company_name_r_pred': 'A SPEC ENVIRONMENTAL LTD',\n", - " 'postcode_r_pred': 'GU15 3AQ',\n", - " 'company_name_l_exist': 'A SPEC ENVIRONMENTAL LTD',\n", - " 'postcode_l_exist': 'GU15 3AJ',\n", - " 'company_name_r_exist': 'A SPEC ENVIRONMENTAL LTD',\n", - " 'postcode_r_exist': 'GU15 3AQ'},\n", - " {'id_l': '12037831',\n", - " 'id_r': '2940538',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'NUSABIOTICS LIMITED',\n", - " 'postcode_l_pred': 'LU4 9LN',\n", - " 'company_name_r_pred': 'NUSABIOTICS LTD',\n", - " 'postcode_r_pred': 'LU4 9LN',\n", - " 'company_name_l_exist': 'NUSABIOTICS LIMITED',\n", - " 'postcode_l_exist': 'LU4 9LN',\n", - " 'company_name_r_exist': 'NUSABIOTICS LTD',\n", - " 'postcode_r_exist': 'LU4 9LN'},\n", - " {'id_l': '08580992',\n", - " 'id_r': '1069458',\n", - " 'match_probability': 0.9849120168133093,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'CONTRACT PUBLISHING UK (CPUK) LTD',\n", - " 'postcode_l_pred': 'PE19 7BA',\n", - " 'company_name_r_pred': 'CONTRACT PUBLISHING UK (CPUK) LTD',\n", - " 'postcode_r_pred': 'PE19 5DA',\n", - " 'company_name_l_exist': 'CONTRACT PUBLISHING UK (CPUK) LTD',\n", - " 'postcode_l_exist': 'PE19 7BA',\n", - " 'company_name_r_exist': 'CONTRACT PUBLISHING UK (CPUK) LTD',\n", - " 'postcode_r_exist': 'PE19 5DA'},\n", - " {'id_l': '01725851',\n", - " 'id_r': '1951622',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 5,\n", - " 'company_name_l_pred': 'SEALOCK LIMITED',\n", - " 'postcode_l_pred': 'SP10 5NU',\n", - " 'company_name_r_pred': 'SEALOCK LTD',\n", - " 'postcode_r_pred': 'SP10 5NU',\n", - " 'company_name_l_exist': 'SEALOCK LIMITED',\n", - " 'postcode_l_exist': 'SP10 5NU',\n", - " 'company_name_r_exist': 'SEALOCK LTD',\n", - " 'postcode_r_exist': 'SP10 5NU'},\n", - " {'id_l': '05822057',\n", - " 'id_r': '201150',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 5,\n", - " 'company_name_l_pred': 'TAYWELL ICE CREAMS LIMITED',\n", - " 'postcode_l_pred': 'TN12 6PY',\n", - " 'company_name_r_pred': 'TAYWELL ICE CREAMS LTD',\n", - " 'postcode_r_pred': 'TN12 6PY',\n", - " 'company_name_l_exist': 'TAYWELL ICE CREAMS LIMITED',\n", - " 'postcode_l_exist': 'TN12 6PY',\n", - " 'company_name_r_exist': 'TAYWELL ICE CREAMS LTD',\n", - " 'postcode_r_exist': 'TN12 6PY'},\n", - " {'id_l': '07138758',\n", - " 'id_r': '1516277',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'BIO FARMA LTD',\n", - " 'postcode_l_pred': 'BL3 5JD',\n", - " 'company_name_r_pred': 'BIO FARMA LTD',\n", - " 'postcode_r_pred': 'BL3 5JD',\n", - " 'company_name_l_exist': 'BIO FARMA LTD',\n", - " 'postcode_l_exist': 'BL3 5JD',\n", - " 'company_name_r_exist': 'BIO FARMA LTD',\n", - " 'postcode_r_exist': 'BL3 5JD'},\n", - " {'id_l': 'SC098014',\n", - " 'id_r': '1903055',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'ORION ENGINEERING SERVICES LIMITED',\n", - " 'postcode_l_pred': 'IV2 6AA',\n", - " 'company_name_r_pred': 'ORION ENGINEERING SERVICES LIMITED',\n", - " 'postcode_r_pred': 'IV2 6AA',\n", - " 'company_name_l_exist': 'ORION ENGINEERING SERVICES LIMITED',\n", - " 'postcode_l_exist': 'IV2 6AA',\n", - " 'company_name_r_exist': 'ORION ENGINEERING SERVICES LIMITED',\n", - " 'postcode_r_exist': 'IV2 6AA'},\n", - " {'id_l': '11408493',\n", - " 'id_r': '1879615',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 4,\n", - " 'company_name_l_pred': 'LUCY WITH DIAMONDS LTD',\n", - " 'postcode_l_pred': 'PO19 1DP',\n", - " 'company_name_r_pred': 'LUCY WITH DIAMONDS LTD',\n", - " 'postcode_r_pred': 'PO19 1DP',\n", - " 'company_name_l_exist': 'LUCY WITH DIAMONDS LTD',\n", - " 'postcode_l_exist': 'PO19 1DP',\n", - " 'company_name_r_exist': 'LUCY WITH DIAMONDS LTD',\n", - " 'postcode_r_exist': 'PO19 1DP'},\n", - " {'id_l': '05404187',\n", - " 'id_r': '1524479',\n", - " 'match_probability': 0.9999437764777301,\n", - " 'score': 5,\n", - " 'company_name_l_pred': 'FLEXIBLE STORAGE SOLUTIONS LTD',\n", - " 'postcode_l_pred': 'RM20 3EF',\n", - " 'company_name_r_pred': 'FLEXIBLE STORAGE SOLUTIONS LIMITED',\n", - " 'postcode_r_pred': 'RM20 3EF',\n", - " 'company_name_l_exist': 'FLEXIBLE STORAGE SOLUTIONS LTD',\n", - " 'postcode_l_exist': 'RM20 3EF',\n", - " 'company_name_r_exist': 'FLEXIBLE STORAGE SOLUTIONS LIMITED',\n", - " 'postcode_r_exist': 'RM20 3EF'}],\n", - " 'eval_only_sample': [{'id_l': '14476295',\n", - " 'id_r': '2847750',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'EXEDGE LTD',\n", - " 'postcode_l_exist': 'OX3 9TP',\n", - " 'company_name_r_exist': 'JESSICA HALIDA HARJONO',\n", - " 'postcode_r_exist': 'OX3 9TP'},\n", - " {'id_l': '02389148',\n", - " 'id_r': '94279',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'FRIULSIDER UK LIMITED',\n", - " 'postcode_l_exist': 'B78 3HG',\n", - " 'company_name_r_exist': 'SIMPSONS STRONG-TIE INTERNATIONAL INC (USA)',\n", - " 'postcode_r_exist': 'B78 3HG'},\n", - " {'id_l': '08969713',\n", - " 'id_r': '2878148',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'PRODIGI (UK) LTD',\n", - " 'postcode_l_exist': 'CF10 1AF',\n", - " 'company_name_r_exist': 'PRODIGI (UK) LIMITED',\n", - " 'postcode_r_exist': 'GU10 2DZ'},\n", - " {'id_l': '07973711',\n", - " 'id_r': '2870064',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'JK SUPPLY LIMITED',\n", - " 'postcode_l_exist': 'E7 9PA',\n", - " 'company_name_r_exist': 'JK SUPPLY LTD',\n", - " 'postcode_r_exist': 'PE7 8FZ'},\n", - " {'id_l': '08910840',\n", - " 'id_r': '1160706',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'CATALYST ADVISORS EUROPE LIMITED',\n", - " 'postcode_l_exist': 'WC2B 5AH',\n", - " 'company_name_r_exist': 'CATALYST ADVISORS EUROPE LIMITED',\n", - " 'postcode_r_exist': 'W1J 6HE'},\n", - " {'id_l': '01610943',\n", - " 'id_r': '248821',\n", - " 'score': 5.0,\n", - " 'company_name_l_exist': 'AVIAGEN LIMITED',\n", - " 'postcode_l_exist': 'CV37 8BH',\n", - " 'company_name_r_exist': 'AVIAGEN LIMITED',\n", - " 'postcode_r_exist': 'EH28 8SZ'},\n", - " {'id_l': '06491238',\n", - " 'id_r': '2377559',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'DESIGNS IN AIR LTD',\n", - " 'postcode_l_exist': 'BS5 6JF',\n", - " 'company_name_r_exist': 'PATRICK JOHN HAMMETT',\n", - " 'postcode_r_exist': 'BS5 6JF'},\n", - " {'id_l': '02970659',\n", - " 'id_r': '3103597',\n", - " 'score': 5.0,\n", - " 'company_name_l_exist': 'KARAS PLATING LIMITED',\n", - " 'postcode_l_exist': 'PR9 0PR',\n", - " 'company_name_r_exist': 'KARAS PLATING LIMITED',\n", - " 'postcode_r_exist': 'WN7 3EH'},\n", - " {'id_l': '08560882',\n", - " 'id_r': '2665269',\n", - " 'score': 3.0,\n", - " 'company_name_l_exist': 'TOCC 2013 LTD',\n", - " 'postcode_l_exist': 'EC2A 4NE',\n", - " 'company_name_r_exist': 'TOCC 2013 LTD',\n", - " 'postcode_r_exist': 'OX14 4SH'},\n", - " {'id_l': '02990100',\n", - " 'id_r': '1162863',\n", - " 'score': 4.0,\n", - " 'company_name_l_exist': 'PETARDS GROUP PLC',\n", - " 'postcode_l_exist': 'GU1 2AB',\n", - " 'company_name_r_exist': 'PETARDS GROUP PLC',\n", - " 'postcode_r_exist': 'NE11 0TU'}],\n", - " 'pred_only_sample': [{'id_l': '11274631',\n", - " 'id_r': '1919310',\n", - " 'match_probability': 0.7006327252377512,\n", - " 'company_name_l_pred': 'JP EXHAUSTS LTD',\n", - " 'postcode_l_pred': 'S9 2DN',\n", - " 'company_name_r_pred': 'EXHAUSTS UK LIMITED',\n", - " 'postcode_r_pred': 'S9 2DN'},\n", - " {'id_l': '08182799',\n", - " 'id_r': '499828',\n", - " 'match_probability': 0.8696886315637885,\n", - " 'company_name_l_pred': 'JJA PACK LTD',\n", - " 'postcode_l_pred': 'S70 2BP',\n", - " 'company_name_r_pred': 'JJA PACK LTD',\n", - " 'postcode_r_pred': 'S72 9LP'},\n", - " {'id_l': 'OC301032',\n", - " 'id_r': '379188',\n", - " 'match_probability': 0.9999156670873419,\n", - " 'company_name_l_pred': 'EYGS LLP',\n", - " 'postcode_l_pred': 'SE1 2DA',\n", - " 'company_name_r_pred': 'EYGS LLP',\n", - " 'postcode_r_pred': 'SE1 2DA'},\n", - " {'id_l': '09841464',\n", - " 'id_r': '459319',\n", - " 'match_probability': 0.8334844645166269,\n", - " 'company_name_l_pred': 'AB COMMERCIALS LTD',\n", - " 'postcode_l_pred': 'FY2 0QX',\n", - " 'company_name_r_pred': 'AB COMMERCIALS LTD',\n", - " 'postcode_r_pred': 'FY1 3HG'},\n", - " {'id_l': '02831994',\n", - " 'id_r': '2237188',\n", - " 'match_probability': 0.7006327252377512,\n", - " 'company_name_l_pred': 'RUSSELL-COOKE TRUST COMPANY',\n", - " 'postcode_l_pred': 'SW15 6AB',\n", - " 'company_name_r_pred': 'RUSSELL-COOKE LLP',\n", - " 'postcode_r_pred': 'SW15 6AB'},\n", - " {'id_l': '09734085',\n", - " 'id_r': '1617216',\n", - " 'match_probability': 0.8334844645166269,\n", - " 'company_name_l_pred': 'GRIP SYSTEMS LIMITED',\n", - " 'postcode_l_pred': 'WS11 0EL',\n", - " 'company_name_r_pred': 'GRIP SYSTEMS LIMITED',\n", - " 'postcode_r_pred': 'WS9 8BH'},\n", - " {'id_l': '09521519',\n", - " 'id_r': '393142',\n", - " 'match_probability': 0.9999156670873419,\n", - " 'company_name_l_pred': 'SPECTRUM ENVIRONMENTAL GROUP LIMITED',\n", - " 'postcode_l_pred': 'WR3 7JW',\n", - " 'company_name_r_pred': 'SPECTRUM ENVIRONMENTAL LIMITED',\n", - " 'postcode_r_pred': 'WR3 7JW'},\n", - " {'id_l': '11707244',\n", - " 'id_r': '3358122',\n", - " 'match_probability': 0.9999156670873419,\n", - " 'company_name_l_pred': 'POLYMOULD LTD',\n", - " 'postcode_l_pred': 'SY16 3AG',\n", - " 'company_name_r_pred': 'POLYMOULD LTD',\n", - " 'postcode_r_pred': 'SY16 3AG'},\n", - " {'id_l': '00510618',\n", - " 'id_r': '705070',\n", - " 'match_probability': 0.7006327252377512,\n", - " 'company_name_l_pred': 'CAMPDEN BRI',\n", - " 'postcode_l_pred': 'GL55 6LD',\n", - " 'company_name_r_pred': 'CAMPDEN BRI (CHIPPING CAMPDEN) LIMITED',\n", - " 'postcode_r_pred': 'GL55 6LD'},\n", - " {'id_l': '12141657',\n", - " 'id_r': '3307978',\n", - " 'match_probability': 0.8334844645166269,\n", - " 'company_name_l_pred': 'TROPGOUK LTD',\n", - " 'postcode_l_pred': 'N3 1DH',\n", - " 'company_name_r_pred': 'TROPGOUK LTD',\n", - " 'postcode_r_pred': 'N18 3BH'}]}" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ch_x_exp.generate_report(sample=10)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/engineering/WL_cleaning_test_2.ipynb b/notebooks/engineering/WL_cleaning_test_2.ipynb deleted file mode 100644 index f5b3d8d..0000000 --- a/notebooks/engineering/WL_cleaning_test_2.ipynb +++ /dev/null @@ -1,992 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "b9608a8f-1bfd-4099-a563-d02f9825d70f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext dotenv\n", - "%dotenv\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "950dd0d0-5e6e-4b8e-9689-f392df09af57", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import ast\n", - "from functools import partial\n", - "\n", - "from cmf import clean, process\n", - "from cmf import locations as loc\n", - "from cmf.clean import steps\n", - "from cmf.clean import utils as cu\n", - "\n", - "from sqlalchemy import create_engine\n", - "from sqlalchemy.orm import Session\n", - "\n", - "import pandas as pd\n", - "import duckdb\n", - "\n", - "engine = create_engine(\"postgresql://\", echo=False)\n", - "engine.dispose()" - ] - }, - { - "cell_type": "markdown", - "id": "7b24298e-b798-4e47-9620-1be4cf186c26", - "metadata": {}, - "source": [ - "# Cleaning tests\n", - "\n", - "Just playing with unit tests." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "811c262c-3544-4b0d-bf24-5fa5474a5c36", - "metadata": {}, - "outputs": [], - "source": [ - "def load_test_data(path):\n", - " dirty = pd.read_csv(Path(path, \"dirty.csv\"), converters={\"list\": ast.literal_eval})\n", - " clean = pd.read_csv(Path(path, \"clean.csv\"), converters={\"list\": ast.literal_eval})\n", - " dirty.columns = [\"col\"]\n", - " clean.columns = [\"col\"]\n", - "\n", - " return dirty, clean" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "b183ead4-6a3c-45fb-8829-ae32cb017411", - "metadata": {}, - "outputs": [], - "source": [ - "expand_abbreviations_partial = partial(\n", - " steps.expand_abbreviations, \n", - " replacements={\"co\": \"company\", \"ltd\": \"limited\"}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0841d9fc-81a9-4e0c-95e0-047566c3f83a", - "metadata": {}, - "outputs": [], - "source": [ - "steps.expand_abbreviations(" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "793ed8c7-0b12-4f52-8e7f-c4ce25911e08", - "metadata": {}, - "outputs": [], - "source": [ - "dirty, cleaned = load_test_data(\n", - " Path(loc.PROJECT_DIR, \"test\", \"cleaning\", \"unnest_renest\", \"expand_abbreviations\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "1c2bf97b-4202-4956-8ed6-d34da8842303", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[foo, foo co]
1[bar ltd, ltd bar]
2[bar ltd, ltd bar]
3[baz]
4[co qux]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [foo, foo co]\n", - "1 [bar ltd, ltd bar]\n", - "2 [bar ltd, ltd bar]\n", - "3 [baz]\n", - "4 [co qux]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dirty" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "1878957e-fc7e-4e01-b0fd-c719449a26f0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[foo, foo company]
1[bar limited, limited bar]
2[bar limited, limited bar]
3[baz]
4[company qux]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [foo, foo company]\n", - "1 [bar limited, limited bar]\n", - "2 [bar limited, limited bar]\n", - "3 [baz]\n", - "4 [company qux]" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cleaned" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "72dd99cc-cc20-4440-9332-a81451ab0779", - "metadata": {}, - "outputs": [], - "source": [ - "test_cleaning_function_arrayed = cu.unnest_renest(\n", - " cu.cleaning_function(\n", - " expand_abbreviations_partial\n", - " )\n", - ")\n", - "\n", - "clean_out = test_cleaning_function_arrayed(dirty, column=\"col\")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "ef69955e-2a6e-4065-aceb-612c89f8fe4c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_out.equals(cleaned)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "fd1ebcb0-ce2b-4abf-8c1e-e4174baf3a54", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_out.sort_values(by=\"col\").reset_index(drop=True).equals(\n", - " cleaned.sort_values(by=\"col\").reset_index(drop=True)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "42266174-84ea-41d7-b5f5-85ccf7c6978d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 True\n", - "2 False\n", - "3 False\n", - "4 False\n", - "Name: col, dtype: bool" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_out.col.sort_values().eq(cleaned.col.sort_values())" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "8e320a64-4fb6-42a1-a8c4-5c316e99a047", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1 [bar limited, limited bar]\n", - "4 [bar limited, limited bar]\n", - "2 [baz]\n", - "0 [company qux]\n", - "3 [foo, foo company]\n", - "Name: col, dtype: object" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "1 [bar limited, limited bar]\n", - "2 [bar limited, limited bar]\n", - "3 [baz]\n", - "4 [company qux]\n", - "0 [foo, foo company]\n", - "Name: col, dtype: object" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_out.col.sort_values()\n", - "cleaned.col.sort_values()" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "3a81a9fd-4384-4d9d-87c9-1b3cdcf9ae71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[bar limited, limited bar]
1[bar limited, limited bar]
2[baz]
3[company qux]
4[foo, foo company]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [bar limited, limited bar]\n", - "1 [bar limited, limited bar]\n", - "2 [baz]\n", - "3 [company qux]\n", - "4 [foo, foo company]" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_out.sort_values(by=\"col\").reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "e37e8fc1-b358-44ab-a265-83463267099e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[bar limited, limited bar]
1[bar limited, limited bar]
2[baz]
3[company qux]
4[foo, foo company]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [bar limited, limited bar]\n", - "1 [bar limited, limited bar]\n", - "2 [baz]\n", - "3 [company qux]\n", - "4 [foo, foo company]" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cleaned.sort_values(by=\"col\").reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "9e817ff6-7465-4c5f-a62f-fef85a882550", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"\\n regexp_replace(\\n \\n regexp_replace(\\n lower( col\\n0 [foo, foo co]\\n1 [bar ltd, ltd bar]\\n2 [bar ltd, ltd bar]\\n3 [baz]\\n4 [co qux]),\\n '\\\\b(co)\\\\b',\\n 'company',\\n 'g'\\n )\\n ,\\n '\\\\b(ltd)\\\\b',\\n 'limited',\\n 'g'\\n )\\n \"" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "steps.expand_abbreviations(dirty)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "e7556bd3-8f65-433d-934b-a6375c63cb9d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[foo, foo company]
1[bar limited, limited bar]
2[bar limited, limited bar]
3[baz]
4[company qux]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [foo, foo company]\n", - "1 [bar limited, limited bar]\n", - "2 [bar limited, limited bar]\n", - "3 [baz]\n", - "4 [company qux]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_clean = cu.cleaning_function(expand_abbreviations_partial)\n", - "\n", - "test_clean(dirty, column=\"col\")" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "54e84359-7312-4729-93dd-dd405454409c", - "metadata": {}, - "outputs": [], - "source": [ - "df = duckdb.sql(\"\"\"\n", - " select\n", - " unnest(col) as col\n", - " from\n", - " dirty\n", - "\"\"\").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "084a2726-3d73-4f4a-8bab-b2d5b1d27342", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0foo
1foo co
2bar ltd
3ltd bar
4bar ltd
5ltd bar
6baz
7co qux
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 foo\n", - "1 foo co\n", - "2 bar ltd\n", - "3 ltd bar\n", - "4 bar ltd\n", - "5 ltd bar\n", - "6 baz\n", - "7 co qux" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "45dc7e64-7a3a-4715-bce6-1d0084248f71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0foo
1foo company
2bar limited
3limited bar
4bar limited
5limited bar
6baz
7company qux
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 foo\n", - "1 foo company\n", - "2 bar limited\n", - "3 limited bar\n", - "4 bar limited\n", - "5 limited bar\n", - "6 baz\n", - "7 company qux" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_clean(df, column=\"col\")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "b5058833-d012-483e-a9db-ef1b368033ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[foo, foo company]
1[bar limited, limited bar]
2[bar limited, limited bar]
3[baz]
4[company qux]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [foo, foo company]\n", - "1 [bar limited, limited bar]\n", - "2 [bar limited, limited bar]\n", - "3 [baz]\n", - "4 [company qux]" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cleaned" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "89d2704a-928b-4388-acb1-5ae36b6240b0", - "metadata": {}, - "outputs": [], - "source": [ - "test_func_2 = cu.unnest_renest(\n", - " cu.cleaning_function(steps.to_upper)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "b9537f26-76f5-405b-8691-e4232cdabee2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0[CO QUX]
1[FOO, FOO CO]
2[BAR LTD, LTD BAR]
3[BAZ]
4[BAR LTD, LTD BAR]
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 [CO QUX]\n", - "1 [FOO, FOO CO]\n", - "2 [BAR LTD, LTD BAR]\n", - "3 [BAZ]\n", - "4 [BAR LTD, LTD BAR]" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_func_2(dirty, column=\"col\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_cleaningfunction.ipynb b/notebooks/engineering/WL_cleaningfunction.ipynb deleted file mode 100644 index 118184b..0000000 --- a/notebooks/engineering/WL_cleaningfunction.ipynb +++ /dev/null @@ -1,2828 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "e7f4f737-e548-47fa-8c47-d43b1da7fa14", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "c296c3ba-ec27-4880-ab15-e339abad93cf", - "metadata": {}, - "source": [ - "# 🧹Cleaning cleaning functions\n", - "\n", - "The company name cleaning function I've been working with explodes to 30GB in memory. It seriously shouldn't. Worth a refactor." - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "d56249c2-d971-4e83-8337-4a321a63a31c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf import locations as loc\n", - "from cmf.data import utils as du\n", - "from cmf.data.star import Star\n", - "from cmf.data.datasets import Dataset\n", - "from cmf.data.probabilities import Probabilities\n", - "from cmf.data.clusters import Clusters\n", - "from cmf.link.splink_linker import SplinkLinker\n", - "from cmf.config import link_pipeline, stopwords\n", - "from cmf.features.clean_complex import *\n", - "from cmf.features.clean_basic import *\n", - "\n", - "import splink.duckdb.comparison_library as cl\n", - "import splink.duckdb.comparison_template_library as ctl\n", - "\n", - "from dotenv import load_dotenv, find_dotenv\n", - "from pathlib import Path\n", - "import os\n", - "import duckdb\n", - "import pandas as pd\n", - "\n", - "dotenv_path = find_dotenv()\n", - "load_dotenv(dotenv_path)" - ] - }, - { - "cell_type": "markdown", - "id": "6b1f48ba-909b-49ac-86d8-c737308192f8", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "Grab some data." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "50d0cbb8-2dce-473c-ac90-2a6911e55f71", - "metadata": {}, - "outputs": [], - "source": [ - "star = Star(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"STAR_TABLE\")\n", - ")\n", - "probabilities = Probabilities(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"PROBABILITIES_TABLE\"),\n", - " star = star\n", - ")\n", - "clusters = Clusters(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"CLUSTERS_TABLE\"),\n", - " star = star\n", - ")\n", - "# cl_x_exp=SplinkLinker.load(\n", - "# path=Path(loc.DATA_SUBDIR['raw'], 'ch_x_exp.pickle')\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3e70080f-3545-49d3-934b-56ce675e3564", - "metadata": {}, - "outputs": [], - "source": [ - "# cl_x_exp = SplinkLinker(\n", - "# dataset = Dataset(\n", - "# star_id=54717,\n", - "# star=star\n", - "# ), \n", - "# probabilities=probabilities, \n", - "# clusters=clusters, \n", - "# n=2\n", - "# )\n", - "# cl_x_exp.get_data(\n", - "# cluster_select={\n", - "# '\"companieshouse\".\"companies\"': [\n", - "# \"company_name as company_name\",\n", - "# \"postcode as postcode\"\n", - "# ]\n", - "# },\n", - "# dim_select=[\n", - "# \"id\",\n", - "# \"company_name\",\n", - "# \"postcode\"\n", - "# ]\n", - "# )\n", - "# cl_x_exp.save(path=Path(loc.DATA_SUBDIR['raw'], 'ch_x_exp.pickle'))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "99674caf-f34e-4f5d-8ddc-d65dd2ad5afd", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1410: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " meta = MetaData(self.connectable, schema=schema)\n" - ] - } - ], - "source": [ - "# df = cl_x_exp.dim_raw.sample(int(1e4))\n", - "df = Dataset(\n", - " selector=1970,\n", - " star=star\n", - ").read_dim(sample=0.05)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "4bf39985-282e-4495-a6dc-0f681e73dcb6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1410: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " meta = MetaData(self.connectable, schema=schema)\n" - ] - } - ], - "source": [ - "# df = cl_x_exp.dim_raw.sample(int(1e4))\n", - "df_lrg = Dataset(\n", - " selector=1970,\n", - " star=star\n", - ").read_dim()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "329e150e-1334-46f6-b750-ee5f74c7d176", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namecompany_numbercare_ofpo_boxaddress_line_1address_line_2post_towncountycountry...previous_name_7previous_name_8_change_dateprevious_name_8previous_name_9_change_dateprevious_name_9previous_name_10_change_dateprevious_name_10conf_statement_next_due_dateconf_statement_last_made_up_datepublish_date
56514509438BROMPTON LODGE CARE LTD14509438132 BROMPTON LANEROCHESTERENGLAND...11/12/20232023-09-01
189608643687LONGFORTH FARM MANAGEMENT COMPANY LIMITED08643687QUEENSWAY HOUSE11 QUEENSWAYNEW MILTONHAMPSHIREENGLAND...05/08/202422/07/20232023-09-01
1061NI691803FAIRBURN FITNESS LTDNI69180326 LINENHALL STREET, 1ST FLOORLINENHALL EXCHANGEBELFASTNORTHERN IRELAND...25/10/20232023-09-01
\n", - "

3 rows × 57 columns

\n", - "
" - ], - "text/plain": [ - " id company_name company_number \\\n", - "565 14509438 BROMPTON LODGE CARE LTD 14509438 \n", - "1896 08643687 LONGFORTH FARM MANAGEMENT COMPANY LIMITED 08643687 \n", - "1061 NI691803 FAIRBURN FITNESS LTD NI691803 \n", - "\n", - " care_of po_box address_line_1 address_line_2 \\\n", - "565 132 BROMPTON LANE \n", - "1896 QUEENSWAY HOUSE 11 QUEENSWAY \n", - "1061 26 LINENHALL STREET, 1ST FLOOR LINENHALL EXCHANGE \n", - "\n", - " post_town county country ... previous_name_7 \\\n", - "565 ROCHESTER ENGLAND ... \n", - "1896 NEW MILTON HAMPSHIRE ENGLAND ... \n", - "1061 BELFAST NORTHERN IRELAND ... \n", - "\n", - " previous_name_8_change_date previous_name_8 previous_name_9_change_date \\\n", - "565 \n", - "1896 \n", - "1061 \n", - "\n", - " previous_name_9 previous_name_10_change_date previous_name_10 \\\n", - "565 \n", - "1896 \n", - "1061 \n", - "\n", - " conf_statement_next_due_date conf_statement_last_made_up_date \\\n", - "565 11/12/2023 \n", - "1896 05/08/2024 22/07/2023 \n", - "1061 25/10/2023 \n", - "\n", - " publish_date \n", - "565 2023-09-01 \n", - "1896 2023-09-01 \n", - "1061 2023-09-01 \n", - "\n", - "[3 rows x 57 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sample(3)" - ] - }, - { - "cell_type": "markdown", - "id": "aa4db9f8-8cd8-4eaf-883a-6353a0d9100d", - "metadata": {}, - "source": [ - "## Unit test\n", - "\n", - "Scratch for making one." - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "c6cf574b-4dc7-49ce-960e-d8a62e1d29ea", - "metadata": {}, - "outputs": [], - "source": [ - "import ast\n", - "\n", - "def load_test_data(path):\n", - " dirty = pd.read_csv(\n", - " Path(path, \"dirty.csv\"), \n", - " converters={\"list\": ast.literal_eval}\n", - " )\n", - " clean = pd.read_csv(\n", - " Path(path, \"clean.csv\"), \n", - " converters={\"list\": ast.literal_eval}\n", - " )\n", - " dirty.columns = [\"col\"]\n", - " clean.columns = [\"col\"]\n", - "\n", - " return dirty, clean\n", - "\n", - "array_except_partial = partial(array_except, terms_to_remove=[\"ltd\", \"plc\"])\n", - "\n", - "dirty, clean = load_test_data(\n", - " Path(loc.PROJECT_DIR, \"test\", \"features\", \"expand_abbreviations\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "87f25f18-3c59-4bdf-9f31-c3c4c7570bef", - "metadata": {}, - "outputs": [], - "source": [ - "def expand_abbreviations(input_column, replacements):\n", - " \"\"\"\n", - " Expand abbreviations passed as a dictionary where the keys are matches\n", - " and the values are what to replace them with.\n", - "\n", - " Matches only when term is surrounded by regex word boundaries.\n", - " \n", - " Arguments: \n", - " input_column: the name of the column to clean\n", - " replacements: a dictionary where keys are matches and values are\n", - " what the replace them with\n", - " \n", - " Returns: string to insert into SQL query\n", - " \"\"\"\n", - " replace_stack = \"\"\n", - " for i, (match, replacement) in enumerate(replacements.items()):\n", - " if i == 0: \n", - " replace_stack = rf\"\"\"\n", - " regexp_replace(\n", - " lower({input_column}),\n", - " '\\b({match})\\b',\n", - " '{replacement}',\n", - " 'g'\n", - " )\n", - " \"\"\"\n", - " else:\n", - " replace_stack = rf\"\"\"\n", - " regexp_replace(\n", - " {replace_stack},\n", - " '\\b({match})\\b',\n", - " '{replacement}',\n", - " 'g'\n", - " )\n", - " \"\"\"\n", - " \n", - " return replace_stack" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "ff00e799-ddf7-4a3b-99b5-a80181a3dff7", - "metadata": {}, - "outputs": [], - "source": [ - "expand_abbreviations_partial = partial(\n", - " expand_abbreviations,\n", - " replacements = {\n", - " \"co\": \"company\",\n", - " \"ltd\": \"limited\",\n", - " \"baz\": \"bazinga\"\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "75c2c7eb-a9eb-42c9-86ad-d02e6a98b5c4", - "metadata": {}, - "outputs": [], - "source": [ - "def passthrough(input_column):\n", - " \"\"\"\n", - " A passthrough cleaning function to help test more complex building functions.\n", - " \"\"\"\n", - " return f\"{input_column}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "79bd8756-fc24-4aa5-b879-ad9e0ec7014f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────────┐\n", - "│ col │\n", - "│ varchar │\n", - "├─────────────┤\n", - "│ foo co │\n", - "│ bar co inc │\n", - "│ baz co co │\n", - "│ quxco │\n", - "│ quux ltd co │\n", - "│ ltdcorge │\n", - "└─────────────┘" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(rf\"\"\"\n", - " select\n", - " {passthrough(\"col\")} as col\n", - " from\n", - " dirty\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "a27318a7-b8aa-4f51-bdfa-4587eb015d50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────┐\n", - "│ col │\n", - "│ varchar │\n", - "├──────────────────────┤\n", - "│ foo company │\n", - "│ bar company inc │\n", - "│ baz company company │\n", - "│ quxcompany │\n", - "│ quux ltd company │\n", - "│ ltdcorge │\n", - "└──────────────────────┘" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(rf\"\"\"\n", - " select\n", - " regexp_replace(\n", - " lower(\"col\"),\n", - " '(co\\s|co$)',\n", - " 'company ',\n", - " 'g'\n", - " ) as col\n", - " from\n", - " dirty\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "487dcdfa-a528-4fee-b3a7-7c3f2c8076be", - "metadata": {}, - "source": [ - "## Pipeline\n", - "\n", - "Testing how we can make stuff using the duckdb factory, and therefore unit testing only the basic versions of functions." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6c44d28d-2c43-4fa0-9443-f230b2eb4235", - "metadata": {}, - "outputs": [], - "source": [ - "df_lrg = duckdb.sql(\"\"\"\n", - " select\n", - " company_name, \n", - " [company_name[:10], company_name[10:]] as secondary_names,\n", - " company_number\n", - " from\n", - " df_lrg\n", - "\"\"\").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6d221407-565f-4a69-88c2-8821e0e604be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_namesecondary_namescompany_number
3597888P AND A HODGES LIMITED[P AND A HO, ODGES LIMITED]07133996
1448594DRIP N DRY LTD[DRIP N DRY, Y LTD]13345001
750611BRAMWELL BROWN LIMITED[BRAMWELL B, BROWN LIMITED]08504514
4780078THE GREEN ROOM BOUTIQUE LIMITED[THE GREEN , ROOM BOUTIQUE LIMITED]13658823
1988230GMTK MANAGEMENT LTD[GMTK MANAG, GEMENT LTD]09662611
\n", - "
" - ], - "text/plain": [ - " company_name \\\n", - "3597888 P AND A HODGES LIMITED \n", - "1448594 DRIP N DRY LTD \n", - "750611 BRAMWELL BROWN LIMITED \n", - "4780078 THE GREEN ROOM BOUTIQUE LIMITED \n", - "1988230 GMTK MANAGEMENT LTD \n", - "\n", - " secondary_names company_number \n", - "3597888 [P AND A HO, ODGES LIMITED] 07133996 \n", - "1448594 [DRIP N DRY, Y LTD] 13345001 \n", - "750611 [BRAMWELL B, BROWN LIMITED] 08504514 \n", - "4780078 [THE GREEN , ROOM BOUTIQUE LIMITED] 13658823 \n", - "1988230 [GMTK MANAG, GEMENT LTD] 09662611 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_lrg.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "f2eb2e2d-f7c9-4a4f-a1b4-387575c30ab0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 13min 43s, sys: 8.26 s, total: 13min 51s\n", - "Wall time: 6min 24s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_namecompany_numbersecondary_names
0goberub13404790[goberub l, None]
1nspired investmentsSC606050[nspired i, investments]
2nvertd designs09152972[nvertd de, esigns]
3yozo fass02714021[yozo fass, s]
4bora 213220580[bora 2, 2]
............
5393601zeenu14458541[zeenu limi, ited]
5393602zeeshan shafqat 79914816987[zeeshan sh, hafqat 799]
5393603zeestar12600587[zeestar li, imited]
5393604zeezo14364849[zeezo limi, ited]
5393605zegura11782185[zegura, d]
\n", - "

5393606 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " company_name company_number secondary_names\n", - "0 goberub 13404790 [goberub l, None]\n", - "1 nspired investments SC606050 [nspired i, investments]\n", - "2 nvertd designs 09152972 [nvertd de, esigns]\n", - "3 yozo fass 02714021 [yozo fass, s]\n", - "4 bora 2 13220580 [bora 2, 2]\n", - "... ... ... ...\n", - "5393601 zeenu 14458541 [zeenu limi, ited]\n", - "5393602 zeeshan shafqat 799 14816987 [zeeshan sh, hafqat 799]\n", - "5393603 zeestar 12600587 [zeestar li, imited]\n", - "5393604 zeezo 14364849 [zeezo limi, ited]\n", - "5393605 zegura 11782185 [zegura, d]\n", - "\n", - "[5393606 rows x 3 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "clean_comp_names(df_lrg, \"company_name\", \"secondary_names\")" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "f9057f33-c8ce-4da8-ad98-8a17799267cc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nest_idcompany_namesecondary_namescompany_number
015 DAY BLINDS LIMITED5 DAY BLIN08294716
115 DAY BLINDS LIMITEDNDS LIMITED08294716
225 DE PARYS LTD5 DE PARYS08046339
325 DE PARYS LTDS LTD08046339
435 DE VERE GARDENS LTD5 DE VERE13930524
\n", - "
" - ], - "text/plain": [ - " nest_id company_name secondary_names company_number\n", - "0 1 5 DAY BLINDS LIMITED 5 DAY BLIN 08294716\n", - "1 1 5 DAY BLINDS LIMITED NDS LIMITED 08294716\n", - "2 2 5 DE PARYS LTD 5 DE PARYS 08046339\n", - "3 2 5 DE PARYS LTD S LTD 08046339\n", - "4 3 5 DE VERE GARDENS LTD 5 DE VERE 13930524" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unnest = duckdb.sql(f\"\"\"\n", - "select\n", - " row_number() over () as nest_id,\n", - " *\n", - " replace (unnest(secondary_names) as secondary_names)\n", - "from\n", - " df2;\n", - "\"\"\").df()\n", - "unnest.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "98898d6b-4bd1-42b4-86f3-418f7c44509f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nest_idcompany_namesecondary_namescompany_number
015 day blinds5 DAY BLIN08294716
115 day blindsNDS LIMITED08294716
225 de parys5 DE PARYS08046339
325 de parysS LTD08046339
435 de vere gardens5 DE VERE13930524
\n", - "
" - ], - "text/plain": [ - " nest_id company_name secondary_names company_number\n", - "0 1 5 day blinds 5 DAY BLIN 08294716\n", - "1 1 5 day blinds NDS LIMITED 08294716\n", - "2 2 5 de parys 5 DE PARYS 08046339\n", - "3 2 5 de parys S LTD 08046339\n", - "4 3 5 de vere gardens 5 DE VERE 13930524" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "processed = clean_primary(unnest, \"company_name\")\n", - "processed.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "66790b19-813f-46ee-9774-5e7a9325b2a3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'any_value(company_name), any_value(secondary_names), any_value(company_number)'" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\", \".join([f\"any_value({col})\" for col in processed.columns if col != 'nest_id'])" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "10e6b332-1c45-4913-8959-cdf920227f7c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────────────────┬───────────────────────────┬───────────────────────────────────────────────────┐\n", - "│ any_value(company_name) │ any_value(company_number) │ secondary_names │\n", - "│ varchar │ varchar │ varchar[] │\n", - "├──────────────────────────────────────┼───────────────────────────┼───────────────────────────────────────────────────┤\n", - "│ 5 DAY BLINDS LIMITED │ 08294716 │ [5 DAY BLIN, NDS LIMITED] │\n", - "│ 5 DE PARYS LTD │ 08046339 │ [5 DE PARYS, S LTD] │\n", - "│ 5 DE VERE GARDENS LTD │ 13930524 │ [5 DE VERE , GARDENS LTD] │\n", - "│ 5 DE VERE GARDENS MANAGEMENT COMPA… │ 02490721 │ [5 DE VERE , GARDENS MANAGEMENT COMPANY LIMITED] │\n", - "│ 5 DEEP LIMITED │ 04190451 │ [5 DEEP LIM, MITED] │\n", - "│ 5 DEGREES FALMOUTH LIMITED │ 06902635 │ [5 DEGREES , FALMOUTH LIMITED] │\n", - "│ 5 DEGREES WEST MOTORBOAT TRAINING … │ 10717438 │ [5 DEGREES , WEST MOTORBOAT TRAINING LTD] │\n", - "│ 5 DEGREES WESTWARDS LTD │ 13171200 │ [5 DEGREES , WESTWARDS LTD] │\n", - "│ 5 DEMPSTER ROAD MANAGEMENT COMPANY… │ 04314012 │ [5 DEMPSTER, R ROAD MANAGEMENT COMPANY LIMITED] │\n", - "│ 5 DENMARK TERRACE BRIGHTON LIMITED │ 03620115 │ [5 DENMARK , TERRACE BRIGHTON LIMITED] │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ VOUTIQUE LTD │ 07850974 │ [VOUTIQUE L, LTD] │\n", - "│ NURSURYLAND(LONDON)LIMITED │ 00388419 │ [NURSURYLAN, ND(LONDON)LIMITED] │\n", - "│ S & K RETAIL LTD │ 10119343 │ [S & K RETA, AIL LTD] │\n", - "│ SHALINI PRIVATE LIMITED │ 14225350 │ [SHALINI PR, RIVATE LIMITED] │\n", - "│ UE WILKES LTD │ 13940041 │ [UE WILKES , LTD] │\n", - "│ S & K SCROWTHER LIMITED │ 04507842 │ [S & K SCRO, OWTHER LIMITED] │\n", - "│ QUBIS TECHNOLOGIES LTD │ 11708293 │ [QUBIS TECH, HNOLOGIES LTD] │\n", - "│ PETER BATTY PRODUCTIONS LIMITED │ 00964477 │ [PETER BATT, TY PRODUCTIONS LIMITED] │\n", - "│ STOIQ LIMITED │ 14503956 │ [STOIQ LIMI, ITED] │\n", - "│ STORM LEGAL LIMITED │ 12481965 │ [STORM LEGA, AL LIMITED] │\n", - "├──────────────────────────────────────┴───────────────────────────┴───────────────────────────────────────────────────┤\n", - "│ 2907 rows (20 shown) 3 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - "select\n", - " any_value(company_name), \n", - " any_value(company_number),\n", - " list(secondary_names) as secondary_names\n", - "from\n", - " unnest\n", - "group by nest_id;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "58f3de8e-d83a-4be1-9ddc-582a71a13af6", - "metadata": {}, - "outputs": [ - { - "ename": "ParserException", - "evalue": "Parser Error: syntax error at or near \"replace\"\nLINE 4: replace (list(...\n ^", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mParserException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[37], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m renest \u001b[38;5;241m=\u001b[39m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;43mselect\u001b[39;49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;43m any_value(*)\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43m replace (list(secondary_names) as secondary_names)\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43mfrom\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m unnest\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43mgroup by nest_id;\u001b[39;49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mdf()\n\u001b[1;32m 9\u001b[0m renest\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m5\u001b[39m)\n", - "\u001b[0;31mParserException\u001b[0m: Parser Error: syntax error at or near \"replace\"\nLINE 4: replace (list(...\n ^" - ] - } - ], - "source": [ - "renest = duckdb.sql(f\"\"\"\n", - "select\n", - " *\n", - " replace (list(secondary_names) as secondary_names)\n", - "from\n", - " unnest\n", - "group by nest_id;\n", - "\"\"\").df()\n", - "renest.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "803e40ff-af4a-47e7-b9da-ae3a0415ec41", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0foo company
1foo company
2barxco
3bar3co
4baz limited
5qux
6quux uk corp
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 foo company\n", - "1 foo company\n", - "2 barxco\n", - "3 bar3co\n", - "4 baz limited\n", - "5 qux\n", - "6 quux uk corp" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dirty, clean = load_test_data(\n", - " Path(\n", - " loc.PROJECT_DIR, \n", - " \"test\", \n", - " \"features\", \n", - " \"duckdb_cleaning_factory\", \n", - " \"clean_comp_names\"\n", - " )\n", - ")\n", - "cleaning_func = duckdb_cleaning_factory(\n", - " [\n", - " clean_punctuation,\n", - " expand_abbreviations,\n", - " tokenise,\n", - " array_except_partial,\n", - " list_join_to_string\n", - " ]\n", - ")\n", - "cleaning_func(dirty, 'col')" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "50491e18-7286-4a2b-886d-96f5a8cf5696", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
col
0foo§co
1foo co
2barxco
3bar3co
4baz ltd
5qux. Plc
\n", - "
" - ], - "text/plain": [ - " col\n", - "0 foo§co\n", - "1 foo co\n", - "2 barxco\n", - "3 bar3co\n", - "4 baz ltd\n", - "5 qux. Plc" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dirty" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "9de68094-85db-438a-94c1-1d9255b8735e", - "metadata": {}, - "outputs": [], - "source": [ - "from functools import partial\n", - "\n", - "remove_stopwords = partial(array_except, terms_to_remove=stopwords)\n", - "\n", - "clean_primary = duckdb_cleaning_factory(\n", - " [\n", - " clean_company_name,\n", - " remove_stopwords,\n", - " list_join_to_string,\n", - " ]\n", - ")\n", - "clean_secondary = unnest_renest(clean_primary)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "23c74f00-64a0-4c9e-926f-abc3f223a584", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namecompany_numbercare_ofpo_boxaddress_line_1address_line_2post_towncountycountry...previous_name_7previous_name_8_change_dateprevious_name_8previous_name_9_change_dateprevious_name_9previous_name_10_change_dateprevious_name_10conf_statement_next_due_dateconf_statement_last_made_up_datepublish_date
113207661954freddy foxtrots vintage emporium0766195418 TOP ENDRENHOLDBEDFORDENGLAND...22/06/202408/06/20232023-09-01
2315023369aaa gas engineers extensions1502336920 WENLOCK ROADLONDONENGLAND...06/08/20242023-09-01
149614512479i security services i sec1451247946 HOUGHTON PLACEBRADFORDWEST YORKSHIREUNITED KINGDOM...12/12/20232023-09-01
\n", - "

3 rows × 57 columns

\n", - "
" - ], - "text/plain": [ - " id company_name company_number care_of \\\n", - "1132 07661954 freddy foxtrots vintage emporium 07661954 \n", - "23 15023369 aaa gas engineers extensions 15023369 \n", - "1496 14512479 i security services i sec 14512479 \n", - "\n", - " po_box address_line_1 address_line_2 post_town county \\\n", - "1132 18 TOP END RENHOLD BEDFORD \n", - "23 20 WENLOCK ROAD LONDON \n", - "1496 46 HOUGHTON PLACE BRADFORD WEST YORKSHIRE \n", - "\n", - " country ... previous_name_7 previous_name_8_change_date \\\n", - "1132 ENGLAND ... \n", - "23 ENGLAND ... \n", - "1496 UNITED KINGDOM ... \n", - "\n", - " previous_name_8 previous_name_9_change_date previous_name_9 \\\n", - "1132 \n", - "23 \n", - "1496 \n", - "\n", - " previous_name_10_change_date previous_name_10 \\\n", - "1132 \n", - "23 \n", - "1496 \n", - "\n", - " conf_statement_next_due_date conf_statement_last_made_up_date \\\n", - "1132 22/06/2024 08/06/2023 \n", - "23 06/08/2024 \n", - "1496 12/12/2023 \n", - "\n", - " publish_date \n", - "1132 2023-09-01 \n", - "23 2023-09-01 \n", - "1496 2023-09-01 \n", - "\n", - "[3 rows x 57 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_primary(df, \"company_name\").sample(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "008dacad-87c0-4863-809e-8c4f98a36202", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_namesecondary_namescompany_number
2783SUNCH CONSULTING LTD[sunch cons, sulting]14731169
1648CUSTOM HOUSE FLATS MANAGEMENT COMPANY (ST IVES...[custom hou, use flats management st ives]02547194
647CORNELIUS CAPITAL LIMITED[cornelius, capital]14645653
\n", - "
" - ], - "text/plain": [ - " company_name \\\n", - "2783 SUNCH CONSULTING LTD \n", - "1648 CUSTOM HOUSE FLATS MANAGEMENT COMPANY (ST IVES... \n", - "647 CORNELIUS CAPITAL LIMITED \n", - "\n", - " secondary_names company_number \n", - "2783 [sunch cons, sulting] 14731169 \n", - "1648 [custom hou, use flats management st ives] 02547194 \n", - "647 [cornelius, capital] 14645653 " - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_secondary(df2, \"secondary_names\").sample(3)" - ] - }, - { - "cell_type": "markdown", - "id": "e6db892e-453a-4b5a-a22e-2bc24f83512e", - "metadata": {}, - "source": [ - "## Experiments" - ] - }, - { - "cell_type": "markdown", - "id": "5be4c3e6-d3ce-4dd9-a310-6a04e6a8c40d", - "metadata": {}, - "source": [ - "What does this function actually do?\n", - "\n", - "* Standard clean of company name, returns tokens in an array\n", - "* Standard clean of an array of company's second names -- this as array of arrays, presumably\n", - "* Removes stopwords from the cleaned names\n", - " * By joining in the stopwords to EVERY ROW\n", - "* Adds lists of terms removed etc (with pandas functions)\n", - "\n", - "I think we can make it way more efficient by overwriting columns, keeping it in duckdb, and ditching columns that aren't needed in prod." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "82bbdc32-4615-462e-929d-2685b28488c8", - "metadata": {}, - "outputs": [], - "source": [ - "sec_df = duckdb.sql(\"\"\"\n", - " select\n", - " *,\n", - " [company_name, company_name] as secondary_names\n", - " from\n", - " df;\n", - "\"\"\").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "ccd3d84a-8baf-4df3-87b4-ed79524cf5b4", - "metadata": {}, - "outputs": [], - "source": [ - "def array_except(input_col_name, terms_to_remove):\n", - " return rf\"\"\"\n", - " array_filter(\n", - " {input_col_name},\n", - " x -> not array_contains({terms_to_remove}, x)\n", - " )\n", - " \"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "fbb37824-389b-4a7a-afb7-3ab99a83e913", - "metadata": {}, - "outputs": [], - "source": [ - "def array_except(input_col_name, terms_to_remove):\n", - " return rf\"\"\"\n", - " array_filter(\n", - " {input_col_name},\n", - " x -> not array_contains({terms_to_remove}, x)\n", - " )\n", - " \"\"\"\n", - "\n", - "def clean_comp_names(\n", - " df, primary_col: str, secondary_col: str = None, stopwords: str = stopwords\n", - "):\n", - "\n", - " clean_and_stopwords_primary_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {list_join_to_string(\n", - " array_except(\n", - " clean_company_name(primary_col), \n", - " stopwords\n", - " )\n", - " )}\n", - " as {primary_col}\n", - " )\n", - " from\n", - " df;\n", - " \"\"\"\n", - " \n", - " if secondary_col is not None:\n", - " unnest_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace (unnest({secondary_col}) as {secondary_col})\n", - " from\n", - " df;\n", - " \"\"\"\n", - " clean_and_stopwords_secondary_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {list_join_to_string(\n", - " array_except(\n", - " clean_company_name(secondary_col), \n", - " stopwords\n", - " )\n", - " )}\n", - " as {secondary_col}\n", - " )\n", - " from\n", - " df;\n", - " \"\"\"\n", - " renest_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace (list({secondary_col}) as {secondary_col})\n", - " from\n", - " df\n", - " group by all;\n", - " \"\"\"\n", - " to_run = [\n", - " unnest_sql, \n", - " clean_and_stopwords_secondary_sql,\n", - " renest_sql,\n", - " clean_and_stopwords_primary_sql\n", - " ]\n", - " else:\n", - " to_run = [\n", - " clean_and_stopwords_primary_sql\n", - " ]\n", - "\n", - " for sql in to_run:\n", - " df = duckdb.sql(sql).df()\n", - "\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "0b8ec261-a445-4206-afa7-7cfa743eda97", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
01523028gemini trading nottmNG16 3SU
11029738exel technologyNG18 5FU
2898745dominic schusterGU21 2LX
32656450ocompany toolsL33 7TW
43274294montagne jeunesseSA12 7AX
............
99951509290radha suppliesSN4 0AW
99962423214relay floor systemsWS13 6PY
99972011906echo brand communicationsBH21 7UH
99982857066poclain hydraulicsPE8 4HN
9999389039edina manufacturingBT28 2RE
\n", - "

10000 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "0 1523028 gemini trading nottm NG16 3SU\n", - "1 1029738 exel technology NG18 5FU\n", - "2 898745 dominic schuster GU21 2LX\n", - "3 2656450 ocompany tools L33 7TW\n", - "4 3274294 montagne jeunesse SA12 7AX\n", - "... ... ... ...\n", - "9995 1509290 radha supplies SN4 0AW\n", - "9996 2423214 relay floor systems WS13 6PY\n", - "9997 2011906 echo brand communications BH21 7UH\n", - "9998 2857066 poclain hydraulics PE8 4HN\n", - "9999 389039 edina manufacturing BT28 2RE\n", - "\n", - "[10000 rows x 3 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_comp_names(\n", - " df,\n", - " primary_col=\"company_name\",\n", - " secondary_col=None,\n", - " stopwords=stopwords\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8eb375a0-c11b-47bc-a66c-ee643697ac39", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcodesecondary_names
0898745dominic schusterGU21 2LX[dominic schuster, dominic schuster]
1135087partners design consultantsEC1M 6BM[partners design consultants, partners design ...
2133562alliance wineKA15 1LN[alliance wine, alliance wine]
3177081051parcelE3 3QR[51parcel, 51parcel]
42142513babble cloudEC3A 5AR[babble cloud, babble cloud]
...............
99952773496am digitalWN6 9RD[am digital, am digital]
99961970987lewis antony richardcharlesNG18 4TW[lewis antony richardcharles, lewis antony ric...
9997983787metocean telematicsPO15 7AB[metocean telematics, metocean telematics]
99982689722zakas dimitriosAB10 1ZP[zakas dimitrios, zakas dimitrios]
99991366131west london nhs trustUB2 4SA[west london nhs trust, west london nhs trust]
\n", - "

10000 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " id company_name postcode \\\n", - "0 898745 dominic schuster GU21 2LX \n", - "1 135087 partners design consultants EC1M 6BM \n", - "2 133562 alliance wine KA15 1LN \n", - "3 1770810 51parcel E3 3QR \n", - "4 2142513 babble cloud EC3A 5AR \n", - "... ... ... ... \n", - "9995 2773496 am digital WN6 9RD \n", - "9996 1970987 lewis antony richardcharles NG18 4TW \n", - "9997 983787 metocean telematics PO15 7AB \n", - "9998 2689722 zakas dimitrios AB10 1ZP \n", - "9999 1366131 west london nhs trust UB2 4SA \n", - "\n", - " secondary_names \n", - "0 [dominic schuster, dominic schuster] \n", - "1 [partners design consultants, partners design ... \n", - "2 [alliance wine, alliance wine] \n", - "3 [51parcel, 51parcel] \n", - "4 [babble cloud, babble cloud] \n", - "... ... \n", - "9995 [am digital, am digital] \n", - "9996 [lewis antony richardcharles, lewis antony ric... \n", - "9997 [metocean telematics, metocean telematics] \n", - "9998 [zakas dimitrios, zakas dimitrios] \n", - "9999 [west london nhs trust, west london nhs trust] \n", - "\n", - "[10000 rows x 4 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_comp_names(\n", - " sec_df,\n", - " primary_col=\"company_name\",\n", - " secondary_col=\"secondary_names\",\n", - " stopwords=stopwords\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "420404b5-fa3b-4368-9e23-344b6135cafb", - "metadata": {}, - "outputs": [], - "source": [ - "cl_df = clean_comp_names(\n", - " cl_x_exp.cluster_raw,\n", - " primary_col=\"company_name\",\n", - " secondary_col=None,\n", - " stopwords=stopwords\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9913465a-8584-45b4-bec6-8cb6b7987a78", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcodesecondary_names
01523028GEMINI TRADING (NOTTM) LIMITEDNG16 3SUGEMINI TRADING (NOTTM) LIMITED
11523028GEMINI TRADING (NOTTM) LIMITEDNG16 3SUGEMINI TRADING (NOTTM) LIMITED
21029738EXEL TECHNOLOGY GROUP LTDNG18 5FUEXEL TECHNOLOGY GROUP LTD
31029738EXEL TECHNOLOGY GROUP LTDNG18 5FUEXEL TECHNOLOGY GROUP LTD
4898745DOMINIC SCHUSTER LIMITEDGU21 2LXDOMINIC SCHUSTER LIMITED
\n", - "
" - ], - "text/plain": [ - " id company_name postcode \\\n", - "0 1523028 GEMINI TRADING (NOTTM) LIMITED NG16 3SU \n", - "1 1523028 GEMINI TRADING (NOTTM) LIMITED NG16 3SU \n", - "2 1029738 EXEL TECHNOLOGY GROUP LTD NG18 5FU \n", - "3 1029738 EXEL TECHNOLOGY GROUP LTD NG18 5FU \n", - "4 898745 DOMINIC SCHUSTER LIMITED GU21 2LX \n", - "\n", - " secondary_names \n", - "0 GEMINI TRADING (NOTTM) LIMITED \n", - "1 GEMINI TRADING (NOTTM) LIMITED \n", - "2 EXEL TECHNOLOGY GROUP LTD \n", - "3 EXEL TECHNOLOGY GROUP LTD \n", - "4 DOMINIC SCHUSTER LIMITED " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unnest = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (unnest(secondary_names) as secondary_names)\n", - " from\n", - " sec_df;\n", - "\"\"\").df()\n", - "unnest.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e9e6a54e-7a78-4cc1-a1f6-318a5b81f700", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcodesecondary_names
01523028GEMINI TRADING (NOTTM) LIMITEDNG16 3SUgemini trading nottm
11523028GEMINI TRADING (NOTTM) LIMITEDNG16 3SUgemini trading nottm
21029738EXEL TECHNOLOGY GROUP LTDNG18 5FUexel technology
31029738EXEL TECHNOLOGY GROUP LTDNG18 5FUexel technology
4898745DOMINIC SCHUSTER LIMITEDGU21 2LXdominic schuster
\n", - "
" - ], - "text/plain": [ - " id company_name postcode secondary_names\n", - "0 1523028 GEMINI TRADING (NOTTM) LIMITED NG16 3SU gemini trading nottm\n", - "1 1523028 GEMINI TRADING (NOTTM) LIMITED NG16 3SU gemini trading nottm\n", - "2 1029738 EXEL TECHNOLOGY GROUP LTD NG18 5FU exel technology\n", - "3 1029738 EXEL TECHNOLOGY GROUP LTD NG18 5FU exel technology\n", - "4 898745 DOMINIC SCHUSTER LIMITED GU21 2LX dominic schuster" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_and_stopwords_secondary = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {list_join_to_string(\n", - " array_except(\n", - " clean_company_name('secondary_names'), \n", - " stopwords\n", - " )\n", - " )}\n", - " as secondary_names\n", - " )\n", - " from\n", - " unnest;\n", - "\"\"\").df()\n", - "clean_and_stopwords_secondary.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "5777122f-cca7-4655-9191-67fcdc38d3d1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcodesecondary_names
03274294MONTAGNE JEUNESSE INTERNATIONAL LIMITEDSA12 7AX[montagne jeunesse, montagne jeunesse]
12405408ARMASHIELD LIMITEDPO7 7XJ[armashield, armashield]
283891MARINE AND CHARTER SOLUTIONS LLPLL53 7AH[marine charter solutions, marine charter solu...
31981031TROUBADOR PUBLISHING LTDLE8 0RX[troubador publishing, troubador publishing]
42477306J HEEBINK (MANCHESTER) LIMITEDM16 0RJ[j heebink manchester, j heebink manchester]
\n", - "
" - ], - "text/plain": [ - " id company_name postcode \\\n", - "0 3274294 MONTAGNE JEUNESSE INTERNATIONAL LIMITED SA12 7AX \n", - "1 2405408 ARMASHIELD LIMITED PO7 7XJ \n", - "2 83891 MARINE AND CHARTER SOLUTIONS LLP LL53 7AH \n", - "3 1981031 TROUBADOR PUBLISHING LTD LE8 0RX \n", - "4 2477306 J HEEBINK (MANCHESTER) LIMITED M16 0RJ \n", - "\n", - " secondary_names \n", - "0 [montagne jeunesse, montagne jeunesse] \n", - "1 [armashield, armashield] \n", - "2 [marine charter solutions, marine charter solu... \n", - "3 [troubador publishing, troubador publishing] \n", - "4 [j heebink manchester, j heebink manchester] " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "renest = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (list(secondary_names) as secondary_names)\n", - " from\n", - " clean_and_stopwords_secondary\n", - " group by all;\n", - "\"\"\").df()\n", - "renest.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2a3875b5-2c49-4452-b7fe-2ba048bd8a7b", - "metadata": {}, - "outputs": [], - "source": [ - "primary_col = \"company_name\"\n", - "clean_primary_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace ({clean_company_name(primary_col)} as {primary_col})\n", - " from\n", - " to_process;\n", - "\"\"\"\n", - "stopwords_primary_sql = f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {list_join_to_string(\n", - " array_except(\n", - " primary_col, \n", - " stopwords\n", - " )\n", - " )}\n", - " as {primary_col}\n", - " )\n", - " from\n", - " to_process;\n", - "\"\"\"\n", - "to_do = [clean_primary_sql, stopwords_primary_sql]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81840c98-8ce4-44c9-9487-ec3b33501a84", - "metadata": {}, - "outputs": [], - "source": [ - "to_process = df\n", - "for i in to_do:\n", - " to_process = duckdb.sql(i)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "9d7840cc-d6e7-4f4a-9100-c86decc22cc7", - "metadata": {}, - "outputs": [], - "source": [ - "x1 = duckdb.sql(f\"\"\"\n", - " select\n", - " *,\n", - " {clean_company_name(\"company_name\")} as name_clean,\n", - " {array_except(\"name_clean\", stopwords)} as name_without_stopwords,\n", - " {list_join_to_string(\"name_without_stopwords\")} as name_out\n", - " from\n", - " df;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "dae99bcd-b5de-4aed-b7ec-6fbe7d399011", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬─────────────────────────────┬──────────┐\n", - "│ id │ company_name │ postcode │\n", - "│ int64 │ varchar │ varchar │\n", - "├─────────┼─────────────────────────────┼──────────┤\n", - "│ 258194 │ shanti hospitality │ SW1P 2PN │\n", - "│ 2090119 │ mywebtonet webhosting │ PO18 8EN │\n", - "│ 1568046 │ medina spares │ BB7 1QD │\n", - "│ 2983001 │ astronova │ SL6 3RT │\n", - "│ 459540 │ progressive motorsport │ NN13 7ES │\n", - "│ 3108194 │ soltechsupply │ CV31 1LW │\n", - "│ 2572987 │ cwt commodity logistics │ RM18 7EB │\n", - "│ 3362460 │ western air ducts │ BA11 2FD │\n", - "│ 2461809 │ anglo italian enterprises │ W1G 8NP │\n", - "│ 1551069 │ meir australia │ EC4V 4BE │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ 3429276 │ towerbrook capital partners │ SW1Y 4AH │\n", - "│ 2782615 │ transportify │ IP2 8LH │\n", - "│ 239146 │ schmitz cargobull │ WA4 4EZ │\n", - "│ 1069547 │ clive christian london │ SW1X 7XL │\n", - "│ 1180373 │ gary brown │ YO12 4PA │\n", - "│ 232102 │ ancompany catalysts │ B61 7EP │\n", - "│ 3208697 │ brand way food │ NW10 7AE │\n", - "│ 1491296 │ jo bird │ TA9 4RN │\n", - "│ 2039955 │ gardline │ NR30 3NG │\n", - "│ 1279838 │ r m electrical │ SO19 2PB │\n", - "├─────────┴─────────────────────────────┴──────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 3 columns │\n", - "└──────────────────────────────────────────────────┘" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - " select\n", - " * \n", - " exclude(name_clean, name_without_stopwords, name_out)\n", - " replace(name_out as company_name)\n", - " from\n", - " x1;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "a58ea0ec-1221-4a49-a8eb-3277906334db", - "metadata": {}, - "outputs": [], - "source": [ - "sec_df2 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (unnest(secondary_names) as secondary_names)\n", - " from\n", - " sec_df;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "2ae7b6b6-e45b-4960-96d3-62cfa6dc2a73", - "metadata": {}, - "outputs": [], - "source": [ - "sec_df3 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace ({clean_company_name(\"secondary_names\")} as secondary_names)\n", - " from\n", - " sec_df2;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "eec4d939-95ea-4ff8-a168-3a2ea5c1200a", - "metadata": {}, - "outputs": [], - "source": [ - "sec_df4 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {\n", - " list_join_to_string(\n", - " array_except(\"secondary_names\", stopwords)\n", - " )\n", - " }\n", - " as secondary_names\n", - " )\n", - " from\n", - " sec_df3;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "6891b461-4e94-4e3f-8341-89d89947b5bc", - "metadata": {}, - "outputs": [], - "source": [ - "sec_df5 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (list(secondary_names) as secondary_names)\n", - " from\n", - " sec_df4\n", - " group by all;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "8ed4f1b9-08ab-4a89-9822-248578013efe", - "metadata": {}, - "outputs": [], - "source": [ - "def array_except(input_col_name, terms_to_remove):\n", - " return rf\"\"\"\n", - " array_filter(\n", - " {input_col_name},\n", - " x -> not array_contains({terms_to_remove}, x)\n", - " )\n", - " \"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "e5c7e93e-b0e4-414e-b502-0a92eea2a28a", - "metadata": {}, - "outputs": [], - "source": [ - "df2 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace ({clean_company_name(\"company_name\")} as company_name)\n", - " from\n", - " df;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "efe14155-f93c-460e-ae18-833ae7f2c087", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " replace (\n", - " {\n", - " list_join_to_string(\n", - " array_except(\"company_name\", stopwords)\n", - " )\n", - " }\n", - " as company_name\n", - " )\n", - " from\n", - " df2;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7dba1858-6155-4612-b53a-2629f5fb1eac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬──────────────────────────────────────────────┬──────────┐\n", - "│ id │ company_name │ postcode │\n", - "│ int64 │ varchar │ varchar │\n", - "├─────────┼──────────────────────────────────────────────┼──────────┤\n", - "│ 2720694 │ lloyd julian │ NR6 7GA │\n", - "│ 647217 │ niels larsen │ WF5 0HP │\n", - "│ 505204 │ churchill fire │ EC2A 3QR │\n", - "│ 618395 │ buzz pinky │ PO9 2NA │\n", - "│ 3361781 │ t f tull │ WD18 8RH │\n", - "│ 650314 │ vct │ GU24 8HU │\n", - "│ 2310276 │ showerdrape std │ M17 1DB │\n", - "│ 249534 │ maquet │ NE35 9PZ │\n", - "│ 2321202 │ fiera capital iom │ IM1 1EU │\n", - "│ 2893212 │ nature s buddy │ SW17 0QF │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ · │ · │ · │\n", - "│ 1957717 │ base childrenswear │ IG8 8HF │\n", - "│ 50716 │ rascal clothing │ EN11 0BE │\n", - "│ 2145000 │ gw wines │ WA14 4QF │\n", - "│ 1754977 │ jyw │ TA2 7AS │\n", - "│ 891327 │ digital print │ NN7 2EG │\n", - "│ 1624804 │ lff scotland │ AB32 6JL │\n", - "│ 2628894 │ dandara iom holdings │ IM2 2SA │\n", - "│ 1415928 │ ecom │ BD10 9TQ │\n", - "│ 1701108 │ shen zhen shi lang ma ke ji you xian gong si │ AB10 1ZP │\n", - "│ 1109511 │ millerbrown │ HD9 6EB │\n", - "├─────────┴──────────────────────────────────────────────┴──────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 3 columns │\n", - "└───────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f45d9479-aac2-45ec-b710-dd5be1f0b9d4", - "metadata": {}, - "outputs": [], - "source": [ - "sql_clean_company_name = f\"\"\"\n", - " select\n", - " {clean_company_name(primary_col)} as company_name_arr,\n", - " {\n", - " f\"{clean_company_name(secondary_col)} as secondary_names_arr, \"\n", - " if secondary_col\n", - " else \"\"\n", - " }\n", - " *\n", - " from df\n", - "\"\"\"\n", - "names_cleaned = duckdb.sql(sql_clean_company_name) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae4157cf-ab24-4d6e-85e9-3c7de3eb4e73", - "metadata": {}, - "outputs": [], - "source": [ - "\"function\": clean_comp_names,\n", - "\"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - "}," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c80dbd5-f446-4ce3-8196-f012652edf42", - "metadata": {}, - "outputs": [], - "source": [ - "\"function\": clean_comp_names,\n", - "\"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - "}," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fe4affc-4f6c-4660-81fd-540aacd15055", - "metadata": {}, - "outputs": [], - "source": [ - "def clean_comp_names(\n", - " df, primary_col: str, secondary_col: str = None, stopwords: str = stopwords\n", - "):\n", - " \"\"\"\n", - " Lower case, remove punctuation & tokenise the primary company name into an array.\n", - " Extract tokens into: 'unusual' and 'stopwords'. Dedupe. Sort alphabetically.\n", - " Untokenise the unusual words back to a string.\n", - "\n", - " Args:\n", - " df: a dataframe\n", - " primary_col: a column containing the company's main name\n", - " secondary_col: a column containing an array of the company's\n", - " secondary names\n", - " stopwords: a list of stopwords to use for this clean\n", - " Returns:\n", - " dataframe: company number, 'unusual' tokens', most common 3 tokens,\n", - " most common 4 to 6 tokens, list of previous names of company, postcode.\n", - " \"\"\"\n", - "\n", - " # TODO: Refactor the silly nested f-strings\n", - "\n", - " # CLEAN and TOKENISE\n", - " # To a new dataframe\n", - " sql_clean_company_name = f\"\"\"\n", - " select\n", - " {clean_company_name(primary_col)} as company_name_arr,\n", - " {\n", - " f\"{clean_company_name(secondary_col)} as secondary_names_arr, \"\n", - " if secondary_col\n", - " else \"\"\n", - " }\n", - " *\n", - " from df\n", - " \"\"\"\n", - " names_cleaned = duckdb.sql(sql_clean_company_name) # noqa:F841\n", - "\n", - " # Define STOPWORDS\n", - " # And join them in\n", - " stopword_tokens = pd.DataFrame({\"token_array\": [stopwords]}) # noqa:F841\n", - " sql_companies_arr_with_top = \"\"\"\n", - " select\n", - " *,\n", - " (select * from stopword_tokens) as stopwords\n", - " from names_cleaned\n", - " \"\"\"\n", - " with_common_terms = duckdb.sql(sql_companies_arr_with_top) # noqa:F841\n", - "\n", - " # EXTRACT the UNUSUAL and STOPWORD tokens\n", - " # We want the weird stuff from company names\n", - " # TODO: leave name_unusual_tokens (and secondary...) as array & remove split() below\n", - " def secondary_name_unusual_tokens():\n", - " # DuckDB needs a refactor, sorry\n", - " return list_join_to_string(array_except(\"secondary_names_arr\", \"stopwords\"))\n", - "\n", - " def cat_names_tokens_stopwords(primary_arr, secondary_arr, stopwords):\n", - " # DuckDB needs a refactor, sorry\n", - " # return array_intersect(\"secondary_names_arr\", \"stopwords\")\n", - " primary = rf\"{array_intersect(primary_arr, stopwords)}\"\n", - " secondary = rf\"{array_intersect(primary_arr, stopwords)}\"\n", - "\n", - " if secondary_arr:\n", - " return rf\"\"\"\n", - " array_cat(\n", - " {primary},\n", - " {secondary}\n", - " )\n", - " \"\"\"\n", - " else:\n", - " return rf\"{primary}\"\n", - "\n", - " sql_manipulate_arrays = f\"\"\"\n", - " select\n", - " *,\n", - " {\n", - " list_join_to_string(\n", - " array_except(\"company_name_arr\", \"stopwords\")\n", - " )\n", - " }\n", - " as name_unusual_tokens,\n", - " {\n", - " (\n", - " f\"{secondary_name_unusual_tokens()} \"\n", - " \"as secondary_name_unusual_tokens\"\n", - " )\n", - " if secondary_col\n", - " else \"\"\n", - " }\n", - " {\n", - " cat_names_tokens_stopwords(\n", - " \"company_name_arr\",\n", - " \"secondary_names_arr\",\n", - " stopwords\n", - " )\n", - " } as names_tokens_stopwords\n", - " from with_common_terms\n", - " \"\"\"\n", - " clean = duckdb.sql(sql_manipulate_arrays)\n", - "\n", - " clean_df = clean.df()\n", - "\n", - " # DEDUPE names_tokens_stopwords\n", - " clean_df[\"name_unusual_tokens\"] = clean_df.name_unusual_tokens.apply(\n", - " lambda x: \" \".join(sorted(set(x.split()))) if pd.notnull(x) else x\n", - " )\n", - " if secondary_col:\n", - " clean_df[\n", - " \"secondary_name_unusual_tokens\"\n", - " ] = clean_df.secondary_name_unusual_tokens.apply(\n", - " lambda x: \" \".join(sorted(set(x.split()))) if pd.notnull(x) else x\n", - " )\n", - "\n", - " clean_df[\"names_tokens_stopwords\"] = clean_df.names_tokens_stopwords.apply(\n", - " lambda x: \" \".join(set(x))\n", - " )\n", - "\n", - " # Get HEAD and TAIL characters\n", - " # For blocking rules\n", - " clean_df[\"name_unusual_tokens_first5\"] = clean_df.name_unusual_tokens.str[:5]\n", - " clean_df[\"name_unusual_tokens_last5\"] = clean_df.name_unusual_tokens.str[-5:]\n", - "\n", - " return clean_df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_compatibility-tests.ipynb b/notebooks/engineering/WL_compatibility-tests.ipynb deleted file mode 100644 index e982b99..0000000 --- a/notebooks/engineering/WL_compatibility-tests.ipynb +++ /dev/null @@ -1,306 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'1.30.0'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "'1.3.5'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf.models import utils as mu\n", - "from sklearn.datasets import load_iris\n", - "\n", - "import mlflow\n", - "mlflow.__version__\n", - "\n", - "import pandas as pd \n", - "pd.__version__" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "end_run() got an unexpected keyword argument 'run_name'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mml2.4_test\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: end_run() got an unexpected keyword argument 'run_name'" - ] - } - ], - "source": [ - "mlflow.end_run(run_name=\"ml2.4_test\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "ename": "Exception", - "evalue": "Run with UUID 6022453812504d7ba86b5f1a63ff71a8 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m mlflow\u001b[38;5;241m.\u001b[39mset_experiment(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDefault\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mml2.4_test\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mTesting ML Flow 2.4\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m mlflow\u001b[38;5;241m.\u001b[39mlog_text(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFoo bar\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m mlflow\u001b[38;5;241m.\u001b[39mlog_metric(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetric_1\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/mlflow/tracking/fluent.py:271\u001b[0m, in \u001b[0;36mstart_run\u001b[0;34m(run_id, experiment_id, run_name, nested, tags, description)\u001b[0m\n\u001b[1;32m 269\u001b[0m experiment_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(experiment_id) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(experiment_id, \u001b[38;5;28mint\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m experiment_id\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_active_run_stack) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m nested:\n\u001b[0;32m--> 271\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\n\u001b[1;32m 272\u001b[0m (\n\u001b[1;32m 273\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRun with UUID \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m is already active. To start a new run, first end the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcurrent run with mlflow.end_run(). To start a nested \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun, call start_run with nested=True\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m )\u001b[38;5;241m.\u001b[39mformat(_active_run_stack[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mrun_id)\n\u001b[1;32m 277\u001b[0m )\n\u001b[1;32m 278\u001b[0m client \u001b[38;5;241m=\u001b[39m MlflowClient()\n\u001b[1;32m 279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m run_id:\n", - "\u001b[0;31mException\u001b[0m: Run with UUID 6022453812504d7ba86b5f1a63ff71a8 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True" - ] - } - ], - "source": [ - "mlflow.set_experiment(\"Default\")\n", - "mlflow.start_run(run_name=\"ml2.4_test\", description=\"Testing ML Flow 2.4\")\n", - "mlflow.log_text(\"Foo bar\")\n", - "mlflow.log_metric(\"metric_1\", 1)\n", - "mlflow.log_param(\"my\", 'param')\n", - "mlflow.end_run()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'{\\n \"link_type\": \"link_and_dedupe\",\\n \"retain_matching_columns\": true,\\n \"retain_intermediate_'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = mlflow.artifacts.load_text(\"mlflow-artifacts:/0/18cde7eb6d6e42dfb7f4278f491e4ba2/artifacts/model/companies_matching_model.json\")\n", - "model[:100]" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
\n", - "
" - ], - "text/plain": [ - " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n", - "0 5.1 3.5 1.4 0.2\n", - "1 4.9 3.0 1.4 0.2\n", - "2 4.7 3.2 1.3 0.2" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "iris = load_iris()\n", - "iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)\n", - "iris_df.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
\n", - "
" - ], - "text/plain": [ - " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n", - "0 5.1 3.5 1.4 0.2\n", - "1 4.9 3.0 1.4 0.2\n", - "2 4.7 3.2 1.3 0.2" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset: PandasDataset = mlflow.data.from_pandas(iris_df)\n", - "dataset.df.head(3)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/engineering/WL_dtypemap.ipynb b/notebooks/engineering/WL_dtypemap.ipynb deleted file mode 100644 index 45bedd5..0000000 --- a/notebooks/engineering/WL_dtypemap.ipynb +++ /dev/null @@ -1,295 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "312a4b24-da71-46da-99a0-00a204cb8dee", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import duckdb" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "abdab3b9-1159-4792-a012-85053ba46576", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
0b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00...a
1b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00...b
2b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00...c
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00... a\n", - "1 b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00... b\n", - "2 b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00... c" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame({\n", - " \"x\": [bytes(123), bytes(456), bytes(789)],\n", - " \"y\": [\"a\", \"b\", \"c\"]\n", - "})\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "4a714c57-f9cc-4696-bf63-9f3d1e71b0ab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
0[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...a
1[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...b
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... a\n", - "1 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... b" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_2 = duckdb.sql(\"\"\"\n", - " select x, y\n", - " from df\n", - " where y in ('a', 'b')\n", - "\"\"\")\n", - "df_2.df()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "663e4155-5195-4f88-92a0-013f0f69bd05", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
0b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00...a
1b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00...b
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00... a\n", - "1 b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00... b" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_2.arrow().to_pandas()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "dc14f9a3-4f99-4abf-ac7c-27479cc52bbe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
xy
0bytearray(b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x...a
1bytearray(b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x...b
\n", - "
" - ], - "text/plain": [ - " x y\n", - "0 bytearray(b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x... a\n", - "1 bytearray(b'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x... b" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_2.map(str)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_duckdb-debug.ipynb b/notebooks/engineering/WL_duckdb-debug.ipynb deleted file mode 100644 index 2251017..0000000 --- a/notebooks/engineering/WL_duckdb-debug.ipynb +++ /dev/null @@ -1,601 +0,0 @@ -{ - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16-final" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python_defaultSpec_1687788418504", - "display_name": "Python 3.9.16 64-bit" - } - }, - "nbformat": 4, - "nbformat_minor": 2, - "cells": [ - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "from cmf.config import settings\n", - "from cmf.data import utils as du\n", - "\n", - "import logging\n", - "import duckdb\n", - "\n", - "import mlflow\n", - "from mlflow.entities import ViewType\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "'models:/companies_matching_model.json'" - }, - "metadata": {}, - "execution_count": 68 - } - ], - "source": [ - "x = f\"\"\"\n", - " models:/\n", - " companies_matching_model.json\n", - "\"\"\"\n", - "\n", - "\"\".join(line.strip() for line in x.splitlines())" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "query = 'attribute.status = FINISHED'\n", - "\n", - "x = mlflow.search_runs(\n", - " experiment_ids=mlflow.get_experiment_by_name(\n", - " 'Default'\n", - " ).experiment_id\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "0 True\n1 False\n2 True\n3 True\n4 False\n5 False\n6 False\n7 False\n8 False\n9 False\n10 False\n11 False\n12 False\n13 False\n14 False\n15 True\n16 True\n17 True\n18 True\n19 True\n20 True\n21 True\nName: status, dtype: bool" - }, - "metadata": {}, - "execution_count": 55 - } - ], - "source": [ - "x.status == 'FINISHED'" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "'a325a3cb39d443e2a734e128ee7105b7'" - }, - "metadata": {}, - "execution_count": 60 - } - ], - "source": [ - "x[(x.end_time == max(x.end_time)) & (x.status == 'FINISHED')].run_id[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "Index(['run_id', 'experiment_id', 'status', 'artifact_uri', 'start_time',\n 'end_time', 'metrics.metric_1', 'metrics.metric_2', 'metrics.score',\n 'metrics.r2', 'metrics.rmse', 'metrics.mae', 'params.param_1',\n 'params.random_state', 'params.n_estimators', 'params.my',\n 'params.alpha', 'params.l1_ratio', 'tags.git_hash',\n 'tags.mlflow.runName', 'tags.mlflow.source.name', 'tags.mlflow.user',\n 'tags.mlflow.source.type', 'tags.dev', 'tags.mlflow.source.git.commit',\n 'tags.mlflow.note.content', 'tags.sample_tag',\n 'tags.mlflow.log-model.history'],\n dtype='object')" - }, - "metadata": {}, - "execution_count": 46 - } - ], - "source": [ - "x.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "[,\n ]" - }, - "metadata": {}, - "execution_count": 32 - } - ], - "source": [ - "[exp for exp in mlflow.search_experiments() where experiment_id]\n", - "\n", - "mlflow.get_experiment_by_name" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "input_dir = \"company-matching__06-26-23_11-40-51\"\n", - "connection = duckdb.connect()\n", - "data = du.build_alias_path_dict(input_dir)\n", - "\n", - "linker = DuckDBLinker(\n", - " list(data.values()),\n", - " settings_dict=settings,\n", - " connection=connection,\n", - " input_table_aliases=list(data.keys()),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "353960" - }, - "metadata": {}, - "execution_count": 27 - } - ], - "source": [ - "x = duckdb.sql(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " read_parquet({list(data.values())})\n", - "\"\"\").fetchall()[0][0]\n", - "\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "error", - "ename": "ParserException", - "evalue": "Parser Error: syntax error at or near \"table\"\nLINE 5: table\n ^", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mParserException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[22], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m table \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 2\u001b[0m duckdb\u001b[38;5;241m.\u001b[39mread_parquet(data[table])\n\u001b[0;32m----> 3\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43m select\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m count(*)\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m from\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m table\u001b[39;49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mParserException\u001b[0m: Parser Error: syntax error at or near \"table\"\nLINE 5: table\n ^" - ] - } - ], - "source": [ - "for table in data.keys():\n", - " duckdb.read_parquet(data[table])\n", - " duckdb.sql(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " table\n", - " \"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "output_type": "error", - "ename": "AttributeError", - "evalue": "'CompanyMatchingDatasets' object has no attribute 'logger'", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[87], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# logger = logging.getLogger(__name__)\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m datasets \u001b[38;5;241m=\u001b[39m \u001b[43mCompanyMatchingDatasets\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/company_matching/src/data/datasets.py:12\u001b[0m, in \u001b[0;36mCompanyMatchingDatasets.__init__\u001b[0;34m(self, sample)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, sample: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdatasets_and_readfuncs \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompanieshouse\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompanies\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcomp_house_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdit\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_hub__companies\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_hub_read(sample),\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhmrc\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrade__exporters\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhmrc_exporters_read(sample),\n\u001b[1;32m 15\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdit\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexport_wins__wins_dataset\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexport_wins_read(sample),\n\u001b[1;32m 16\u001b[0m }\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdatasets_and_readfuncs_clean \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnection \u001b[38;5;241m=\u001b[39m duckdb\u001b[38;5;241m.\u001b[39mconnect()\n", - "File \u001b[0;32m~/company_matching/src/data/datasets.py:135\u001b[0m, in \u001b[0;36mCompanyMatchingDatasets.comp_house_read\u001b[0;34m(self, sample)\u001b[0m\n\u001b[1;32m 127\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;124m select \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcols\u001b[38;5;132;01m}\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124m from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdsname\u001b[38;5;132;01m}\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlimit\u001b[38;5;132;01m}\u001b[39;00m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 133\u001b[0m df_ch \u001b[38;5;241m=\u001b[39m du\u001b[38;5;241m.\u001b[39mquery(sql\u001b[38;5;241m=\u001b[39mquery)\n\u001b[0;32m--> 135\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlogger\u001b[49m\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate_data_log(\n\u001b[1;32m 137\u001b[0m df_ch,\n\u001b[1;32m 138\u001b[0m dsname,\n\u001b[1;32m 139\u001b[0m sample\n\u001b[1;32m 140\u001b[0m )\n\u001b[1;32m 141\u001b[0m )\n\u001b[1;32m 143\u001b[0m df_ch_clean \u001b[38;5;241m=\u001b[39m clean_raw_data(df_ch)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlogger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdsname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m cleaned\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mAttributeError\u001b[0m: 'CompanyMatchingDatasets' object has no attribute 'logger'" - ] - } - ], - "source": [ - "# logger = logging.getLogger(__name__)\n", - "datasets = CompanyMatchingDatasets(sample=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "Index(['unique_id', 'company_number', 'company_name', 'secondary_names',\n 'postcode'],\n dtype='object')" - }, - "metadata": {}, - "execution_count": 62 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "Index(['unique_id', 'company_number', 'company_name', 'secondary_names',\n 'postcode'],\n dtype='object')" - }, - "metadata": {}, - "execution_count": 62 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "Index(['unique_id', 'company_number', 'company_name', 'secondary_names',\n 'postcode'],\n dtype='object')" - }, - "metadata": {}, - "execution_count": 62 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "Index(['unique_id', 'company_number', 'company_name', 'secondary_names',\n 'postcode'],\n dtype='object')" - }, - "metadata": {}, - "execution_count": 62 - } - ], - "source": [ - "datasets.datasets_and_readfuncs_clean['companieshouse_companies'].columns\n", - "datasets.datasets_and_readfuncs_clean['dit_data_hub__companies'].columns\n", - "datasets.datasets_and_readfuncs_clean['hmrc_trade__exporters'].columns\n", - "datasets.datasets_and_readfuncs_clean['dit_export_wins__wins_dataset'].columns" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": "Probability two random records match is estimated to be 3.58e-05.\nThis means that amongst all possible pairwise record comparisons, one in 27,930.00 are expected to match. With 79,800 total possible comparisons, we expect a total of around 2.86 matching pairs\n" - } - ], - "source": [ - "# linker = DuckDBLinker(\n", - "# list(datasets.datasets_and_readfuncs_clean.keys()),\n", - "# settings_dict = settings,\n", - "# connection = datasets.connection\n", - "# )\n", - "linker = datasets.linker(settings)\n", - "linker.estimate_probability_two_random_records_match(\n", - " \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " recall=0.7,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "pandas.core.frame.DataFrame" - }, - "metadata": {}, - "execution_count": 10 - } - ], - "source": [ - "datasets.data[0].__class__" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "['self.companieshouse_companies',\n 'self.dit_data_hub__companies',\n 'self.hmrc_trade__exporters',\n 'self.dit_export_wins__wins_dataset']" - }, - "metadata": {}, - "execution_count": 12 - } - ], - "source": [ - "datasets.alias" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": " unique_id company_number company_name secondary_names \\\n0 08230106 08230106 BOURNE HILL STABLES LIMITED [TEN DELTA LIMITED] \n\n company_status account_category address_line_1 address_line_2 post_town \\\n0 Active MICRO ENTITY 9 CHEAM ROAD EPSOM \n\n county country postcode \\\n0 ENGLAND KT17 1SP \n\n sic_code_1 sic_code_2 sic_code_3 \\\n0 01629 - Support activities for animal producti... \n\n sic_code_4 \n0 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
unique_idcompany_numbercompany_namesecondary_namescompany_statusaccount_categoryaddress_line_1address_line_2post_towncountycountrypostcodesic_code_1sic_code_2sic_code_3sic_code_4
00823010608230106BOURNE HILL STABLES LIMITED[TEN DELTA LIMITED]ActiveMICRO ENTITY9 CHEAM ROADEPSOMENGLANDKT17 1SP01629 - Support activities for animal producti...
\n
" - }, - "metadata": {}, - "execution_count": 15 - } - ], - "source": [ - "datasets.companieshouse_companies.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "output_type": "error", - "ename": "CatalogException", - "evalue": "Catalog Error: Table with name companieshouse_companies does not exist!\nDid you mean \"temp.information_schema.columns\"?", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;43m select\u001b[39;49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;43m *\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43m from\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m datasets.companieshouse_companies\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m limit 1\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name companieshouse_companies does not exist!\nDid you mean \"temp.information_schema.columns\"?" - ] - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " datasets.companieshouse_companies\n", - " limit 1\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "con = duckdb.connect()" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "" - }, - "metadata": {}, - "execution_count": 52 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "" - }, - "metadata": {}, - "execution_count": 52 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "" - }, - "metadata": {}, - "execution_count": 52 - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "" - }, - "metadata": {}, - "execution_count": 52 - } - ], - "source": [ - "# Load data\n", - "datasets = CompanyMatchingDatasets(sample=100)\n", - "\n", - "# Instantiate linker\n", - "# linker = datasets.linker(settings)\n", - "\n", - "table_names = []\n", - "for table in datasets.datasets_and_readfuncs_clean:\n", - " # table_names.append(f\"{table}\")\n", - " con.register(\n", - " f\"{table}\", \n", - " datasets.datasets_and_readfuncs_clean[table]\n", - " )\n" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "['companieshouse_companies',\n 'dit_data_hub__companies',\n 'hmrc_trade__exporters',\n 'dit_export_wins__wins_dataset']" - }, - "metadata": {}, - "execution_count": 53 - } - ], - "source": [ - "list(datasets.datasets_and_readfuncs_clean.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Instantiate linker\n", - "linker = DuckDBLinker(\n", - " list(datasets.datasets_and_readfuncs_clean.keys()),\n", - " settings_dict = settings,\n", - " connection = con\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "output_type": "error", - "ename": "SplinkException", - "evalue": "Error executing the following sql for table `__splink__df_count_cumulative_blocks`(__splink__df_count_cumulative_blocks_4c42918e5):\nCREATE TABLE __splink__df_count_cumulative_blocks_4c42918e5 AS\n(\n WITH __splink__df_concat AS (\n SELECT\n *\n FROM __splink__df_concat_9f1d2ff43\n ), __splink__df_blocked_data AS (\n SELECT\n \"l\".\"source_dataset\" AS \"source_dataset_l\",\n \"r\".\"source_dataset\" AS \"source_dataset_r\",\n \"l\".\"unique_id\" AS \"unique_id_l\",\n \"r\".\"unique_id\" AS \"unique_id_r\",\n \"l\".\"comp_num_clean\" AS \"comp_num_clean_l\",\n \"r\".\"comp_num_clean\" AS \"comp_num_clean_r\",\n \"l\".\"name_unusual_tokens\" AS \"name_unusual_tokens_l\",\n \"r\".\"name_unusual_tokens\" AS \"name_unusual_tokens_r\",\n \"l\".\"postcode\" AS \"postcode_l\",\n \"r\".\"postcode\" AS \"postcode_r\",\n \"l\".\"secondary_name_unusual_tokens\" AS \"secondary_name_unusual_tokens_l\",\n \"r\".\"secondary_name_unusual_tokens\" AS \"secondary_name_unusual_tokens_r\",\n '0' AS match_key\n FROM __splink__df_concat AS l\n INNER JOIN __splink__df_concat AS r\n ON l.name_unusual_tokens = r.name_unusual_tokens\n WHERE\n l.\"source_dataset\" || '-__-' || l.\"unique_id\" < r.\"source_dataset\" || '-__-' || r.\"unique_id\"\n )\n SELECT\n COUNT(*) AS row_count,\n match_key\n FROM __splink__df_blocked_data\n GROUP BY\n match_key\n ORDER BY\n CAST(match_key AS INT)\n)\n\nError was: Binder Error: Values list \"l\" does not have a column named \"name_unusual_tokens\"", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mBinderException\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:637\u001b[0m, in \u001b[0;36mLinker._log_and_run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 637\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_sql_execution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_sql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemplated_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mphysical_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# Parse our SQL through sqlglot to pretty print\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/duckdb/linker.py:221\u001b[0m, in \u001b[0;36mDuckDBLinker._run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_sql_execution\u001b[39m(\u001b[38;5;28mself\u001b[39m, final_sql, templated_name, physical_name):\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_con\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_sql\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mBinderException\u001b[0m: Binder Error: Values list \"l\" does not have a column named \"name_unusual_tokens\"", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mSplinkException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[56], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mestimate_probability_two_random_records_match\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ml.name_unusual_tokens = r.name_unusual_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mrecall\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.7\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:3088\u001b[0m, in \u001b[0;36mLinker.estimate_probability_two_random_records_match\u001b[0;34m(self, deterministic_matching_rules, recall)\u001b[0m\n\u001b[1;32m 3085\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(deterministic_matching_rules, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 3086\u001b[0m deterministic_matching_rules \u001b[38;5;241m=\u001b[39m [deterministic_matching_rules]\n\u001b[0;32m-> 3088\u001b[0m records \u001b[38;5;241m=\u001b[39m \u001b[43mcumulative_comparisons_generated_by_blocking_rules\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3089\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3090\u001b[0m \u001b[43m \u001b[49m\u001b[43mdeterministic_matching_rules\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3091\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3093\u001b[0m summary_record \u001b[38;5;241m=\u001b[39m records[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 3094\u001b[0m num_observed_matches \u001b[38;5;241m=\u001b[39m summary_record[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcumulative_rows\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/analyse_blocking.py:100\u001b[0m, in \u001b[0;36mcumulative_comparisons_generated_by_blocking_rules\u001b[0;34m(linker, blocking_rules, output_chart)\u001b[0m\n\u001b[1;32m 91\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124m select\u001b[39m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124m count(*) as row_count,\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m order by cast(match_key as int) asc\u001b[39m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 99\u001b[0m linker\u001b[38;5;241m.\u001b[39m_enqueue_sql(sql, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__splink__df_count_cumulative_blocks\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 100\u001b[0m cumulative_blocking_rule_count \u001b[38;5;241m=\u001b[39m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_sql_pipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mconcat\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 101\u001b[0m br_n \u001b[38;5;241m=\u001b[39m cumulative_blocking_rule_count\u001b[38;5;241m.\u001b[39mas_pandas_dataframe()\n\u001b[1;32m 102\u001b[0m cumulative_blocking_rule_count\u001b[38;5;241m.\u001b[39mdrop_table_from_database()\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:579\u001b[0m, in \u001b[0;36mLinker._execute_sql_pipeline\u001b[0;34m(self, input_dataframes, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 572\u001b[0m dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sql_to_splink_dataframe_checking_cache(\n\u001b[1;32m 573\u001b[0m sql_gen,\n\u001b[1;32m 574\u001b[0m output_tablename_templated,\n\u001b[1;32m 575\u001b[0m materialise_as_hash,\n\u001b[1;32m 576\u001b[0m use_cache,\n\u001b[1;32m 577\u001b[0m )\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 579\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 580\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pipeline\u001b[38;5;241m.\u001b[39mreset()\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:572\u001b[0m, in \u001b[0;36mLinker._execute_sql_pipeline\u001b[0;34m(self, input_dataframes, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 569\u001b[0m output_tablename_templated \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pipeline\u001b[38;5;241m.\u001b[39mqueue[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39moutput_table_name\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 572\u001b[0m dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sql_to_splink_dataframe_checking_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 573\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_gen\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_tablename_templated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 575\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaterialise_as_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 576\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 577\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 579\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:809\u001b[0m, in \u001b[0;36mLinker._sql_to_splink_dataframe_checking_cache\u001b[0;34m(self, sql, output_tablename_templated, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 806\u001b[0m \u001b[38;5;28mprint\u001b[39m(sql)\n\u001b[1;32m 808\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m materialise_as_hash:\n\u001b[0;32m--> 809\u001b[0m splink_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_sql_against_backend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 810\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_tablename_templated\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtable_name_hash\u001b[49m\n\u001b[1;32m 811\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 812\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 813\u001b[0m splink_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_execute_sql_against_backend(\n\u001b[1;32m 814\u001b[0m sql,\n\u001b[1;32m 815\u001b[0m output_tablename_templated,\n\u001b[1;32m 816\u001b[0m output_tablename_templated,\n\u001b[1;32m 817\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/duckdb/linker.py:216\u001b[0m, in \u001b[0;36mDuckDBLinker._execute_sql_against_backend\u001b[0;34m(self, sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_delete_table_from_database(physical_name)\n\u001b[1;32m 211\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;124mCREATE TABLE \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mphysical_name\u001b[38;5;132;01m}\u001b[39;00m\n\u001b[1;32m 213\u001b[0m \u001b[38;5;124mAS\u001b[39m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msql\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m--> 216\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_log_and_run_sql_execution\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemplated_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mphysical_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DuckDBLinkerDataFrame(templated_name, physical_name, \u001b[38;5;28mself\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:649\u001b[0m, in \u001b[0;36mLinker._log_and_run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 646\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 647\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 649\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SplinkException(\n\u001b[1;32m 650\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError executing the following sql for table \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 651\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtemplated_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mphysical_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfinal_sql\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 652\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mError was: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 653\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n", - "\u001b[0;31mSplinkException\u001b[0m: Error executing the following sql for table `__splink__df_count_cumulative_blocks`(__splink__df_count_cumulative_blocks_4c42918e5):\nCREATE TABLE __splink__df_count_cumulative_blocks_4c42918e5 AS\n(\n WITH __splink__df_concat AS (\n SELECT\n *\n FROM __splink__df_concat_9f1d2ff43\n ), __splink__df_blocked_data AS (\n SELECT\n \"l\".\"source_dataset\" AS \"source_dataset_l\",\n \"r\".\"source_dataset\" AS \"source_dataset_r\",\n \"l\".\"unique_id\" AS \"unique_id_l\",\n \"r\".\"unique_id\" AS \"unique_id_r\",\n \"l\".\"comp_num_clean\" AS \"comp_num_clean_l\",\n \"r\".\"comp_num_clean\" AS \"comp_num_clean_r\",\n \"l\".\"name_unusual_tokens\" AS \"name_unusual_tokens_l\",\n \"r\".\"name_unusual_tokens\" AS \"name_unusual_tokens_r\",\n \"l\".\"postcode\" AS \"postcode_l\",\n \"r\".\"postcode\" AS \"postcode_r\",\n \"l\".\"secondary_name_unusual_tokens\" AS \"secondary_name_unusual_tokens_l\",\n \"r\".\"secondary_name_unusual_tokens\" AS \"secondary_name_unusual_tokens_r\",\n '0' AS match_key\n FROM __splink__df_concat AS l\n INNER JOIN __splink__df_concat AS r\n ON l.name_unusual_tokens = r.name_unusual_tokens\n WHERE\n l.\"source_dataset\" || '-__-' || l.\"unique_id\" < r.\"source_dataset\" || '-__-' || r.\"unique_id\"\n )\n SELECT\n COUNT(*) AS row_count,\n match_key\n FROM __splink__df_blocked_data\n GROUP BY\n match_key\n ORDER BY\n CAST(match_key AS INT)\n)\n\nError was: Binder Error: Values list \"l\" does not have a column named \"name_unusual_tokens\"" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " recall=0.7,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "" - }, - "metadata": {}, - "execution_count": 57 - } - ], - "source": [ - "con.execute(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " companieshouse_companies\n", - " limit 1\n", - "\"\"\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "┌───────────┬────────────────┬──────────────────────┬─────────────────┬───┬────────────┬────────────┬────────────┐\n│ unique_id │ company_number │ company_name │ secondary_names │ … │ sic_code_2 │ sic_code_3 │ sic_code_4 │\n│ varchar │ varchar │ varchar │ varchar[] │ │ varchar │ varchar │ varchar │\n├───────────┼────────────────┼──────────────────────┼─────────────────┼───┼────────────┼────────────┼────────────┤\n│ 03232349 │ 03232349 │ CLASSIC MARQUEES L… │ [] │ … │ │ │ │\n├───────────┴────────────────┴──────────────────────┴─────────────────┴───┴────────────┴────────────┴────────────┤\n│ 1 rows 16 columns (7 shown) │\n└────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - }, - "metadata": {}, - "execution_count": 33 - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - " select\n", - " *\n", - " from\n", - " {table_names[0]}\n", - " limit 1\n", - "\"\"\")" - ] - } - ] -} \ No newline at end of file diff --git a/notebooks/engineering/WL_exceptions.ipynb b/notebooks/engineering/WL_exceptions.ipynb deleted file mode 100644 index dd125e3..0000000 --- a/notebooks/engineering/WL_exceptions.ipynb +++ /dev/null @@ -1,50 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 13, - "id": "261f0288-56de-427d-b683-f921be3f94a1", - "metadata": {}, - "outputs": [ - { - "ename": "CMFSourceError", - "evalue": "Data doesn't exist in Company Matching Framework.\nTable: models\nData: naive_deduper", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCMFSourceError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcmf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresults\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CMFSourceError\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcmf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Models\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CMFSourceError(source\u001b[38;5;241m=\u001b[39mModels, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnaive_deduper\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mCMFSourceError\u001b[0m: Data doesn't exist in Company Matching Framework.\nTable: models\nData: naive_deduper" - ] - } - ], - "source": [ - "from cmf.data.results import CMFSourceError\n", - "from cmf.data import Models\n", - "\n", - "raise CMFSourceError(source=Models, data=\"naive_deduper\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_pred-to-prod.ipynb b/notebooks/engineering/WL_pred-to-prod.ipynb deleted file mode 100644 index de8659c..0000000 --- a/notebooks/engineering/WL_pred-to-prod.ipynb +++ /dev/null @@ -1,477 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "e2c98a20-7952-46df-bcb4-b79bce3081e7", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b280e4d8-cb84-46ec-9413-3573a9291cc0", - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow\n", - "import duckdb\n", - "import json\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "\n", - "from cmf.data import utils as du\n", - "import cmf.locations as loc\n", - "from cmf.config import settings, datasets\n", - "\n", - "DATA_FULL = du.build_alias_path_dict(Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full')\n", - "DATA_100K = du.build_alias_path_dict(Path(loc.DATA_SUBDIR['processed']) / 'company-matching__06-26-23_11-40-51')\n", - "PRED_PATH = Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full' / 'predictions.parquet'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b6672ef2-ddbc-442c-94f9-c03e6e42f84a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1410: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " meta = MetaData(self.connectable, schema=schema)\n" - ] - } - ], - "source": [ - "df_ch = du.get_company_data(\n", - " cols=datasets['\"companieshouse\".\"companies\"'][\"cols\"],\n", - " dataset='\"companieshouse\".\"companies\"',\n", - " where=datasets['\"companieshouse\".\"companies\"'][\"where\"],\n", - " sample=100_000,\n", - ")\n", - "df_dh = du.get_company_data(\n", - " cols=datasets['\"dit\".\"data_hub__companies\"'][\"cols\"],\n", - " dataset='\"dit\".\"data_hub__companies\"',\n", - " where=datasets['\"dit\".\"data_hub__companies\"'][\"where\"],\n", - " sample=100_000,\n", - ")\n", - "df_ex = du.get_company_data(\n", - " cols=datasets['\"hmrc\".\"trade__exporters\"'][\"cols\"],\n", - " dataset='\"hmrc\".\"trade__exporters\"',\n", - " where=datasets['\"hmrc\".\"trade__exporters\"'][\"where\"],\n", - " sample=100_000,\n", - ")\n", - "df_ew = du.get_company_data(\n", - " cols=datasets['\"dit\".\"export_wins__wins_dataset\"'][\"cols\"],\n", - " dataset='\"dit\".\"export_wins__wins_dataset\"',\n", - " where=datasets['\"dit\".\"export_wins__wins_dataset\"'][\"where\"],\n", - " sample=100_000,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "cbdb4ce1-e3fc-4fbf-afa8-8b9a4da4d6c3", - "metadata": {}, - "outputs": [], - "source": [ - "connection = duckdb.connect()\n", - "\n", - "connection.query(f\"\"\"\n", - " create table companieshouse_companies as select * from df_ch;\n", - " create table dit_data_hub__companies as select * from df_dh;\n", - " create table hmrc_trade__exporters as select * from df_ex;\n", - " create table dit_export_wins__wins_dataset as select * from df_ew;\n", - "\"\"\")\n", - "\n", - "json_raw = mlflow.artifacts.load_text(\n", - " artifact_uri=\"runs:/22ce217706c54650ac34f59cb6a45960/model/companies_matching_model.json\"\n", - ")\n", - "json_settings = json.loads(json_raw)\n", - "\n", - "linker = DuckDBLinker(\n", - " list(DATA_100K.values()),\n", - " settings_dict=settings,\n", - " connection=connection,\n", - " input_table_aliases=list(DATA_100K.keys()),\n", - ")\n", - "linker.load_model(json_settings)" - ] - }, - { - "cell_type": "markdown", - "id": "dc8fd7d1-f14c-493f-b11e-ca0402eedb68", - "metadata": {}, - "source": [ - "I've had a lot of problems with the clsutering parts of Splink, but I wondered if I could use the predictions frame similarly to the lookup I made before.\n", - "\n", - "This notebook is to test that out." - ] - }, - { - "cell_type": "markdown", - "id": "d651fab8-340f-42fc-9659-d0a1cc9d12ae", - "metadata": {}, - "source": [ - "## Production with predictions\n", - "\n", - "Using only the prediction dataframe we need:\n", - "\n", - "* (Dupes) For a given source and list of targets, all IDs that need to be joined on both sides, where the highest pairwise match prediction is the ONLY one that matches \n", - "* (Deduped) As above, PLUS only the top match returned between each pair of tables\n", - "\n", - "Don't forget, because we link and dedupe we also have INTERNAL matches at play." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "447b5bd9-b8cb-4cb6-915d-45353a8b98f5", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'comp_num_clean':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "predictions = linker.predict(threshold_match_probability=.7)\n", - "\n", - "connection.query(f\"\"\"\n", - " create table predictions as select * from { predictions.physical_name };\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "6be8c239-34eb-4d3c-82bb-d63d216d9ee5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌────────────────────┬────────────────────┬───┬──────────────────────┬──────────────────────┬───────────┐\n", - "│ match_weight │ match_probability │ … │ unique_id_l │ unique_id_r │ match_key │\n", - "│ double │ double │ │ varchar │ varchar │ varchar │\n", - "├────────────────────┼────────────────────┼───┼──────────────────────┼──────────────────────┼───────────┤\n", - "│ 12.844252836761338 │ 0.9998640323566512 │ … │ 99149ffa-ab32-497b… │ 511cde27-23ef-4a17… │ 0 │\n", - "│ 10.674327835319026 │ 0.9993884366398315 │ … │ 6f61b183-e035-4961… │ 6c9055b9-75d8-4bf8… │ 0 │\n", - "│ 12.036897914703735 │ 0.9997620808861155 │ … │ 35b26d27-7e2d-e611… │ fbf48cd3-18fc-420f… │ 0 │\n", - "│ 15.737393039198157 │ 0.999981695212484 │ … │ 27c313e0-ec36-e711… │ 480ee73a-e97d-e311… │ 0 │\n", - "│ 11.036897914703735 │ 0.999524274956312 │ … │ f1984abb-a098-e211… │ e18fdc4d-0c61-45b3… │ 0 │\n", - "│ 13.259290336040182 │ 0.9998980208010205 │ … │ f31b2bd3-a098-e211… │ 04dcd65c-c2b9-4bb1… │ 0 │\n", - "│ 13.844252836761338 │ 0.9999320115562114 │ … │ 4610ecd2-38ba-484d… │ b8f9c338-3a2e-495b… │ 0 │\n", - "│ 13.844252836761338 │ 0.9999320115562114 │ … │ a70f03bf-ae95-4d97… │ 58c592a2-2f80-4f3d… │ 0 │\n", - "│ 12.674327835319026 │ 0.999847039000951 │ … │ ee33e245-6463-4f39… │ bf94e749-7d9e-4e02… │ 0 │\n", - "│ 13.259290336040182 │ 0.9998980208010205 │ … │ 00e37670-a084-e611… │ 87971a36-4e20-45db… │ 0 │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ 7.522324741873977 │ 0.9945899821736477 │ … │ b6688756-5241-4e1f… │ cbb08ca8-86dc-45e9… │ 0 │\n", - "│ 7.522324741873977 │ 0.9945899821736477 │ … │ 6688f8b9-8b43-40bf… │ cbb08ca8-86dc-45e9… │ 0 │\n", - "│ 8.08936533459787 │ 0.9963418059262881 │ … │ 3975306e-d671-44d3… │ e2362a0d-e306-4755… │ 0 │\n", - "│ 9.384821218124042 │ 0.9985063867538949 │ … │ 3e5d8284-b5d8-438a… │ d6722138-503c-4442… │ 0 │\n", - "│ 8.229542992646131 │ 0.9966794028415534 │ … │ 22b611a1-dd2e-45f2… │ 496fbc44-d60e-4216… │ 0 │\n", - "│ 8.259290336040182 │ 0.9967469496801004 │ … │ 198f547e-3bd1-498c… │ 25234e17-1d26-4ea7… │ 0 │\n", - "│ 8.866972913261423 │ 0.9978627958288209 │ … │ 1c08c4b1-5d1e-4c75… │ 538b30d7-29ff-4e9b… │ 0 │\n", - "│ 8.799858717402886 │ 0.9977612520426443 │ … │ 51cf4d5a-dc6d-4355… │ 9cf52085-109d-4574… │ 0 │\n", - "│ 8.55885061789909 │ 0.9973552834783452 │ … │ 0cb712f0-4ce6-4229… │ 362912cf-9367-455d… │ 0 │\n", - "│ 12.259290336040182 │ 0.9997960623994343 │ … │ 031ca56b-840d-443c… │ 52453073-a7c5-4e66… │ 0 │\n", - "├────────────────────┴────────────────────┴───┴──────────────────────┴──────────────────────┴───────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 7 columns (5 shown) │\n", - "└───────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "connection.query(f\"\"\"\n", - " select *\n", - " from predictions\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "628c398c-3a92-4e12-8042-9b88b03cc02b", - "metadata": {}, - "source": [ - "## Production with clusters\n", - "\n", - "This is more or less lifted from WL_splink-test, with the exception that I've attached the raw data to the DuckDB to mimic the Postgres environment better.\n", - "\n", - "I don't think it's quite working as it was before -- the counts on dupe/dedupe come back suspiciously similar. I don't want to spend time fixing it when I think the future is predictions, so just be careful with the below." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "a993333b-cd5a-4cf2-ac2a-5a8c33f40c55", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'comp_num_clean':\n", - " u values not fully trained\n", - "Completed iteration 1, root rows count 27\n", - "Completed iteration 2, root rows count 0\n" - ] - } - ], - "source": [ - "predictions = linker.predict(threshold_match_probability=.7)\n", - "\n", - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " predictions,\n", - " threshold_match_probability=.7,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - ")\n", - "\n", - "lookup = linker.query_sql(\n", - " f\"\"\"\n", - " select\n", - " source_dataset_l as source,\n", - " unique_id_l as source_id,\n", - " cluster_id_l as source_cluster,\n", - " source_dataset_r as target,\n", - " unique_id_r as target_id,\n", - " cluster_id_r as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " union\n", - " select\n", - " source_dataset_r as source,\n", - " unique_id_r as source_id,\n", - " cluster_id_r as source_cluster,\n", - " source_dataset_l as target,\n", - " unique_id_l as target_id,\n", - " cluster_id_l as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " \"\"\",\n", - " # output_type=\"splink_df\",\n", - ")\n", - "\n", - "connection.query(f\"\"\"\n", - " create table lookup as select * from lookup;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "78314ff2-3d1d-48b4-950e-414f8ba29fc3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100000, 4)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────────┬───────────────────────────┬───────────────────────────┬─────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼───────────────────────────┼───────────────────────────┼─────────┤\n", - "│ 02453212 │ ST HELENS CHAMBER LIMITED │ ST HELENS CHAMBER LIMITED │ NULL │\n", - "│ 07343391 │ EMPOWER ENERGY LIMITED │ NULL │ NULL │\n", - "│ 07374749 │ AMBREY RISK LIMITED │ NULL │ NULL │\n", - "│ 11109773 │ IONIAN PELLO TECH LIMITED │ IONIAN PELLO TECH LIMITED │ NULL │\n", - "│ 03478491 │ PREMIER PITCHES LIMITED │ PREMIER PITCHES LIMITED │ NULL │\n", - "└───────────┴───────────────────────────┴───────────────────────────┴─────────┘" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_with_dupes = connection.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select \n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " ) lookup\n", - " right outer join companieshouse_companies ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join dit_data_hub__companies dh on\n", - " lookup.target_id = dh.unique_id \n", - " and lookup.target = 'dit_data_hub__companies'\n", - " left join dit_export_wins__wins_dataset ew on\n", - " lookup.target_id = ew.unique_id\n", - " and lookup.target = 'dit_export_wins__wins_dataset'\n", - "\"\"\")\n", - "\n", - "join_with_dupes.df().shape\n", - "connection.sql(\"select * from join_with_dupes limit 5\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "29b3c758-833a-436d-accf-4d2c33ebb0bc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100000, 4)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬─────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼────────────────────────────────────────────┼────────────────────────────────────────────┼─────────┤\n", - "│ 11109773 │ IONIAN PELLO TECH LIMITED │ IONIAN PELLO TECH LIMITED │ NULL │\n", - "│ 02453212 │ ST HELENS CHAMBER LIMITED │ ST HELENS CHAMBER LIMITED │ NULL │\n", - "│ 03478491 │ PREMIER PITCHES LIMITED │ PREMIER PITCHES LIMITED │ NULL │\n", - "│ 08435515 │ THE ROYAL BUCKINGHAMSHIRE HOSPITAL LIMITED │ THE ROYAL BUCKINGHAMSHIRE HOSPITAL LIMITED │ NULL │\n", - "│ 07343391 │ EMPOWER ENERGY LIMITED │ NULL │ NULL │\n", - "└───────────┴────────────────────────────────────────────┴────────────────────────────────────────────┴─────────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_no_dupes = connection.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select\n", - " source,\n", - " source_id,\n", - " array_agg(target) as target, \n", - " array_agg(target_id) as target_id\n", - " from (\n", - " select distinct on (\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster\n", - " )\n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster,\n", - " lookup.match_probability desc\n", - " ) lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " group by\n", - " source,\n", - " source_id\n", - " ) lookup\n", - " right join companieshouse_companies ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join dit_data_hub__companies dh on\n", - " array_has(lookup.target_id, dh.unique_id)\n", - " and array_has(lookup.target, 'dit_data_hub__companies')\n", - " left join dit_export_wins__wins_dataset ew on\n", - " array_has(lookup.target_id, ew.unique_id)\n", - " and array_has(lookup.target, 'dit_export_wins__wins_dataset')\n", - "\"\"\")\n", - "\n", - "join_no_dupes.df().shape\n", - "connection.sql(\"select * from join_no_dupes limit 5\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_prob2clus.ipynb b/notebooks/engineering/WL_prob2clus.ipynb deleted file mode 100644 index b792220..0000000 --- a/notebooks/engineering/WL_prob2clus.ipynb +++ /dev/null @@ -1,2409 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4f6f17b1-2052-4322-acfe-dd271846311f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "3706608b-1690-48bb-b8aa-61ce1fb96a7d", - "metadata": {}, - "source": [ - "# Probabilities to cluster algorithm\n", - "\n", - "A notebook to hash out this algorithm and check it works.\n", - "\n", - "Will hopefully turn into a unit test too, hence CVSs into version control." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2d75838e-9781-463e-ac09-3a9097fb630b", - "metadata": {}, - "outputs": [], - "source": [ - "from cmf import locations as loc\n", - "\n", - "import pandas as pd\n", - "import duckdb\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "567ce495-eb33-4f2c-908a-b25db551293d", - "metadata": {}, - "outputs": [], - "source": [ - "clus = pd.read_csv(Path(loc.PROJECT_DIR, \"test\", \"clusters.csv\"))\n", - "prob = pd.read_csv(Path(loc.PROJECT_DIR, \"test\", \"probabilities.csv\"))\n", - "val = pd.read_csv(Path(loc.PROJECT_DIR, \"test\", \"validate.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "626e5131-daf2-45a3-94a5-03792828e6a9", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬───────────┬─────────┬──────────────┬────────┬─────────────┐\n", - "│ uuid │ link_type │ cluster │ id │ source │ probability │\n", - "│ int64 │ varchar │ int64 │ varchar │ int64 │ double │\n", - "├───────┼───────────┼─────────┼──────────────┼────────┼─────────────┤\n", - "│ 1 │ link │ 0 │ will_inc_t1 │ 1 │ 1.0 │\n", - "│ 2 │ link │ 1 │ will_inc_t2 │ 2 │ 0.9 │\n", - "│ 3 │ link │ 2 │ will_inc_t2 │ 2 │ 0.7 │\n", - "│ 4 │ link │ 3 │ will_inc_t2 │ 2 │ 0.4 │\n", - "│ 5 │ link │ 4 │ will_inc_t2 │ 2 │ 0.2 │\n", - "│ 6 │ link │ 1 │ will_inc_t3 │ 3 │ 0.8 │\n", - "│ 7 │ link │ 2 │ will_inc_t3 │ 3 │ 0.7 │\n", - "│ 8 │ link │ 3 │ will_inc_t3 │ 3 │ 0.1 │\n", - "│ 9 │ link │ 4 │ will_inc_t3 │ 3 │ 0.3 │\n", - "│ 10 │ link │ 1 │ will_inc_t4 │ 4 │ 0.75 │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ 43 │ link │ 3 │ sarah_inc_t2 │ 2 │ 0.9 │\n", - "│ 44 │ link │ 4 │ sarah_inc_t2 │ 2 │ 0.8 │\n", - "│ 45 │ link │ 1 │ sarah_inc_t3 │ 3 │ 0.1 │\n", - "│ 46 │ link │ 2 │ sarah_inc_t3 │ 3 │ 0.1 │\n", - "│ 47 │ link │ 3 │ sarah_inc_t3 │ 3 │ 0.2 │\n", - "│ 48 │ link │ 4 │ sarah_inc_t3 │ 3 │ 0.1 │\n", - "│ 49 │ link │ 1 │ sarah_inc_t4 │ 4 │ 0.2 │\n", - "│ 50 │ link │ 2 │ sarah_inc_t4 │ 4 │ 0.75 │\n", - "│ 51 │ link │ 3 │ sarah_inc_t4 │ 4 │ 0.6 │\n", - "│ 52 │ link │ 4 │ sarah_inc_t4 │ 4 │ 0.7 │\n", - "├───────┴───────────┴─────────┴──────────────┴────────┴─────────────┤\n", - "│ 52 rows (20 shown) 6 columns │\n", - "└───────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " prob;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "73f728fc-2a32-4071-8c0a-47334862a6e4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n_seq │ n_par │\n", - "│ int64 │ int64 │ varchar │ int64 │ int64 │ int64 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │ 0 │\n", - "│ 2 │ 1 │ will_inc_t2 │ 2 │ 1 │ 1 │\n", - "│ 3 │ 1 │ will_inc_t3 │ 3 │ 2 │ 1 │\n", - "│ 4 │ 1 │ will_inc_t4 │ 4 │ 3 │ 1 │\n", - "│ 5 │ 2 │ leo_inc_t1 │ 1 │ 0 │ 0 │\n", - "│ 6 │ 2 │ leo_inc_t2 │ 2 │ 1 │ 1 │\n", - "│ 7 │ 2 │ leo_inc_t4 │ 4 │ 3 │ 1 │\n", - "│ 8 │ 3 │ pedro_inc_t1 │ 1 │ 0 │ 0 │\n", - "│ 9 │ 3 │ pedro_inc_t2 │ 2 │ 1 │ 1 │\n", - "│ 9 │ 3 │ pedro_inc_t4 │ 4 │ 3 │ 1 │\n", - "│ 9 │ 4 │ sarah_inc_t1 │ 1 │ 0 │ 0 │\n", - "│ 9 │ 4 │ sarah_inc_t2 │ 2 │ 1 │ 1 │\n", - "│ 9 │ 4 │ sarah_inc_t3 │ 3 │ 2 │ 1 │\n", - "│ 9 │ 4 │ sarah_inc_t4 │ 4 │ 3 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┴───────┤\n", - "│ 14 rows 6 columns │\n", - "└─────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " clus;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "15d54092-9be1-4038-806d-94b920c39739", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬─────────┬─────────┐\n", - "│ uuid │ cluster │ id │ source │ user │ match │\n", - "│ int64 │ int64 │ varchar │ int64 │ varchar │ boolean │\n", - "├───────┼─────────┼──────────────┼────────┼─────────┼─────────┤\n", - "│ 1 │ 3 │ pedro_inc_t2 │ 2 │ user01 │ true │\n", - "│ 2 │ 4 │ sarah_inc_t3 │ 3 │ user01 │ true │\n", - "└───────┴─────────┴──────────────┴────────┴─────────┴─────────┘" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " val;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "1feb3e0e-e9c6-4aba-8322-17a8c1d95fe9", - "metadata": {}, - "source": [ - "## v1\n", - "\n", - "Having done some fiddling below, here's the core SQL.\n", - "\n", - "Step 1: instantiate clusters (or already have a cluster table)\n", - "Step 2: run this to add any new clusters the probabilities table now holds\n", - "\n", - "Note this notebook DOESN'T handle adding unmatched dimensions to the clusters table as new clusters.\n", - "\n", - "Params:\n", - "\n", - "* n: your current stage in 🔌hybrid additive\n", - "* threshold: the point where we consider a probability a valid match. For parallel, note this means all tables will use the same value, which might not be the optimal value across all tables. In this instance, consider making it an additive table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c388d94-b300-4613-a7bc-23bc13f787f1", - "metadata": {}, - "outputs": [], - "source": [ - "sql = \"\"\"\n", - " select\n", - " nextval('uuid') as uuid, -- Create UUID in an appropriate way for Postgres\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_init on\n", - " clus_init.id = prob.id\n", - " and clus_init.source = prob.source\n", - " where \n", - " probability > 0.7 -- Should be set by calling function\n", - " and link_type = 'link'\n", - " order by\n", - " probability desc,\n", - " id desc\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_init\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "id": "5026fef1-9e44-420e-bcea-98e20f83fea7", - "metadata": {}, - "source": [ - "## v2\n", - "\n", - "Woah. v1 is completely wrong. I thought making my IDs a bit like names would be helpful but it's just confused me -- the problem is the same ID being matched to loads of clusters and the interactions that emerge from that.\n", - "\n", - "Let's recall why I thought this needed recursion:\n", - "\n", - "* \"will_inc\" is 0.9 for cluster 1 and 0.8 for cluster 2\n", - "* \"wedro_inc\" is 0.8 for cluster 1 and 0.75 for cluster 2\n", - "\n", - "In the current setup, \"will_inc\" goes to cluster 1 and cluster 2 is unmatched.\n", - "\n", - "What we want is \"will_inc\" to go to cluster 1, and the second best option, \"wedro_inc\", to go to cluster 2.\n", - "\n", - "There's some other stuff too.\n", - "\n", - "* How do we handle validated clusters?\n", - "* Swear there was something else...\n", - "\n", - "On validated, I say we add them first -- per round, not in a blob. Let's make a dummy validated table and add its conclusions to the clusters." - ] - }, - { - "cell_type": "markdown", - "id": "4977f55c-5bfa-478d-bfeb-79b124ac438c", - "metadata": {}, - "source": [ - "# Code" - ] - }, - { - "cell_type": "markdown", - "id": "4cb15e8c-d2e7-42da-b29e-a4557c50a09e", - "metadata": {}, - "source": [ - "## v2\n", - "\n", - "Now dealing with:\n", - "\n", - "* Second best matches when the first best masked it\n", - "* Validated clusters" - ] - }, - { - "cell_type": "markdown", - "id": "e70664e1-8ff3-423e-8d99-a9ee2cf97b0b", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### v2.0\n", - "\n", - "Without recursion or deletion." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f7fb301b-42c1-43fb-97ea-20c146503180", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - "\"\"\")\n", - "clus_init" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "eca7576c-ee95-4ed8-bde6-6151e71977a8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 5 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 6 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_val = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from\n", - " val\n", - " where \n", - " source in (\n", - " select\n", - " source\n", - " from\n", - " prob\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_init\n", - "\"\"\")\n", - "clus_val" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "1eb28b4b-5b9e-456a-9f8e-763f71d583a2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬───────────┬─────────┬──────────────┬────────┬─────────────┐\n", - "│ uuid │ link_type │ cluster │ id │ source │ probability │\n", - "│ int64 │ varchar │ int64 │ varchar │ int64 │ double │\n", - "├───────┼───────────┼─────────┼──────────────┼────────┼─────────────┤\n", - "│ 2 │ link │ 1 │ will_inc_t2 │ 2 │ 0.9 │\n", - "│ 24 │ link │ 2 │ leo_inc_t4 │ 4 │ 0.9 │\n", - "│ 38 │ link │ 3 │ pedro_inc_t4 │ 4 │ 0.9 │\n", - "│ 16 │ link │ 2 │ leo_inc_t2 │ 2 │ 0.8 │\n", - "│ 6 │ link │ 1 │ will_inc_t3 │ 3 │ 0.8 │\n", - "│ 44 │ link │ 4 │ sarah_inc_t2 │ 2 │ 0.8 │\n", - "│ 10 │ link │ 1 │ will_inc_t4 │ 4 │ 0.75 │\n", - "└───────┴───────────┴─────────┴──────────────┴────────┴─────────────┘" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg1.id, agg1.source)\n", - " *\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_val cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " anti join clus_val cl on\n", - " cl.cluster = prob.cluster\n", - " and cl.source = prob.source\n", - " where \n", - " probability >= 0.7\n", - " order by\n", - " probability desc\n", - " ) agg1\n", - " order by\n", - " agg1.probability desc\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "id": "fddd9619-881e-449b-ac12-13e44a601ab5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 13 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 14 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 17 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 18 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 19 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 20 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 21 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 23 │ 4 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 24 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┤\n", - "│ 13 rows 5 columns │\n", - "└─────────────────────────────────────────────────┘" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_1 = duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_val cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " anti join clus_val cl on\n", - " cl.cluster = prob.cluster\n", - " and cl.source = prob.source\n", - " where \n", - " probability >= 0.7\n", - " order by\n", - " probability desc\n", - " ) agg\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_val\n", - "\"\"\")\n", - "clus_1" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "id": "b7501298-5205-42bd-b1ba-80d8ce40eb3f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬───────────┬─────────┬──────────────┬────────┬─────────────┐\n", - "│ uuid │ link_type │ cluster │ id │ source │ probability │\n", - "│ int64 │ varchar │ int64 │ varchar │ int64 │ double │\n", - "├───────┼───────────┼─────────┼──────────────┼────────┼─────────────┤\n", - "│ 52 │ link │ 4 │ sarah_inc_t4 │ 4 │ 0.7 │\n", - "└───────┴───────────┴─────────┴──────────────┴────────┴─────────────┘" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌────────────────────┬───────────────────────────────────────────┬──────────────┐\n", - "│ count(DISTINCT id) │ count(DISTINCT concat(\"cluster\", source)) │ count_star() │\n", - "│ int64 │ int64 │ int64 │\n", - "├────────────────────┼───────────────────────────────────────────┼──────────────┤\n", - "│ 13 │ 13 │ 13 │\n", - "└────────────────────┴───────────────────────────────────────────┴──────────────┘" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 77 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 81 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 84 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 78 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 80 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 75 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 79 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 76 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 83 │ 4 │ sarah_inc_t2 │ 2 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┤\n", - "│ 13 rows 5 columns │\n", - "└─────────────────────────────────────────────────┘" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " agg.*\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_1 cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " anti join clus_1 cl on\n", - " cl.cluster = prob.cluster\n", - " and cl.source = prob.source\n", - " where \n", - " probability >= 0.7\n", - " order by\n", - " probability desc\n", - " ) agg\n", - "\"\"\")\n", - "duckdb.sql(\"\"\"\n", - " select\n", - " count(distinct id),\n", - " count(distinct concat(cluster, source)),\n", - " count(*)\n", - " from\n", - " clus_1\n", - "\"\"\")\n", - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " clus_1\n", - " order by\n", - " cluster\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "id": "9748acfb-5be0-45ab-bc48-e45c9a1bbb19", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 100 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 101 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 122 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 123 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 124 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 125 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 126 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 128 │ 4 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 129 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "│ 131 │ 4 │ sarah_inc_t4 │ 4 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┤\n", - "│ 14 rows 5 columns │\n", - "└─────────────────────────────────────────────────┘" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_2 = duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_1 cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " anti join clus_1 cl on\n", - " cl.cluster = prob.cluster\n", - " and cl.source = prob.source\n", - " where \n", - " probability >= 0.7\n", - " order by\n", - " probability desc\n", - " ) agg\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_1\n", - "\"\"\")\n", - "clus_2" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "id": "22206d48-9c1c-4a02-a2fb-dd0610a0780c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 184 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 185 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 258 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 259 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 260 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 261 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 262 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 264 │ 4 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 265 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "│ 269 │ 4 │ sarah_inc_t4 │ 4 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┤\n", - "│ 14 rows 5 columns │\n", - "└─────────────────────────────────────────────────┘" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_3 = duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_2 cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " anti join clus_2 cl on\n", - " cl.cluster = prob.cluster\n", - " and cl.source = prob.source\n", - " where \n", - " probability >= 0.7\n", - " order by\n", - " probability desc\n", - " ) agg\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_2\n", - "\"\"\")\n", - "clus_3" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "id": "d14ecdbe-f453-4081-87cd-7088d0c8c20a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_check_l = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " clus_3\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - "\"\"\")\n", - "clus_check_r = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n_par::int as n\n", - " from\n", - " clus\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n_par\n", - "\"\"\")\n", - "clus_check_l.df().equals(clus_check_r.df())\n", - "clus_check_l.df().compare(clus_check_r.df())" - ] - }, - { - "cell_type": "markdown", - "id": "e7828c5f-731d-49e2-8805-52ab83c83945", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### v2.1\n", - "\n", - "Let's recurse 👹" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "48736a48-0082-4f9e-bf24-8bdb8ed9e0f4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 11 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 12 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from \n", - " clus_val\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "752031b0-df28-451e-9d2a-207f536affb0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │ step │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │ 1 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │ 1 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │ 1 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │ 1 │\n", - "│ 36712 │ 3 │ pedro_inc_t2 │ 2 │ 1 │ 1 │\n", - "│ 36713 │ 4 │ sarah_inc_t3 │ 3 │ 1 │ 1 │\n", - "│ 36714 │ 1 │ will_inc_t2 │ 2 │ 1 │ 2 │\n", - "│ 36715 │ 2 │ leo_inc_t4 │ 4 │ 1 │ 2 │\n", - "│ 36716 │ 3 │ pedro_inc_t4 │ 4 │ 1 │ 2 │\n", - "│ 36717 │ 2 │ leo_inc_t2 │ 2 │ 1 │ 2 │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ 48921 │ 1 │ will_inc_t3 │ 3 │ 1 │ 1112 │\n", - "│ 48915 │ 1 │ will_inc_t2 │ 2 │ 1 │ 1112 │\n", - "│ 48918 │ 3 │ sarah_inc_t2 │ 2 │ 1 │ 1112 │\n", - "│ 48920 │ 2 │ leo_inc_t2 │ 2 │ 1 │ 1112 │\n", - "│ 48917 │ 3 │ pedro_inc_t4 │ 4 │ 1 │ 1112 │\n", - "│ 48923 │ 1 │ will_inc_t4 │ 4 │ 1 │ 1112 │\n", - "│ 48914 │ 0 │ will_inc_t1 │ 1 │ 1 │ 1112 │\n", - "│ 48925 │ 0 │ will_inc_t1 │ 1 │ 1 │ 1113 │\n", - "│ 48926 │ 1 │ will_inc_t2 │ 2 │ 1 │ 1113 │\n", - "│ 48928 │ 3 │ pedro_inc_t4 │ 4 │ 1 │ 1113 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┴───────┤\n", - "│ ? rows (>9999 rows, 20 shown) 6 columns │\n", - "└─────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " with recursive clusters as (\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " 1 as step\n", - " from \n", - " clus_val\n", - " union\n", - " select distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " agg.cluster,\n", - " agg.id,\n", - " agg.source,\n", - " 1 as n,\n", - " step + 1\n", - " from (\n", - " select distinct on (p.cluster, p.source)\n", - " p.*, (select max(step) from clusters) step\n", - " from\n", - " prob p\n", - " where not exists (\n", - " select 1 from\n", - " clusters cl\n", - " where cl.id = p.id and cl.source = p.source and cl.step < 3\n", - " ) and \n", - " not exists (\n", - " select\n", - " 1\n", - " from\n", - " clusters cl\n", - " where cl.cluster = p.cluster and cl.source = p.source and cl.step < 3\n", - " ) \n", - " and\n", - " p.probability >= 0.7\n", - " order by\n", - " p.probability desc\n", - " ) agg\n", - " )\n", - " select\n", - " *\n", - " from\n", - " clusters \n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "8b276edb-8bfd-4edb-a623-31ee105f2a95", - "metadata": {}, - "source": [ - "### v2.2\n", - "\n", - "Michał says that because the theoretical limit of the recursion is so high, we may end up with large blocking queries and hitting database settings limits. This is bad for performance and bugfixing. Suggests two tables and `delete returning`. I think this is a solid suggestion." - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "id": "7141f495-c22c-4992-9df6-592e95e118fc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - "\"\"\")\n", - "clus_init" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "id": "8641e015-c5bf-4b17-821d-69f3f9e896a3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 5 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 6 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 139, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_val = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from\n", - " val\n", - " where \n", - " source in (\n", - " select\n", - " source\n", - " from\n", - " prob\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_init\n", - "\"\"\")\n", - "clus_val" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "id": "891311da-80b3-4759-ac2a-5ac6749d2499", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 5 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 6 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - " union\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from\n", - " val\n", - " where \n", - " source in (\n", - " select\n", - " source\n", - " from\n", - " prob\n", - " )\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "ec1a4222-3c21-4458-ad78-e9c776181183", - "metadata": {}, - "source": [ - "We want to:\n", - "\n", - "1. Create a temporary `probability` table with every candidate value\n", - "2. Antijoin on `clusters` to get the bits we want to insert\n", - "3. If this contains values\n", - " 1. `delete returning` into `clusters`\n", - " 2. Go back to 2.\n", - "4. If the result is empty, done" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "id": "2f40b541-3cf1-4329-aa6f-7c9254ee421e", - "metadata": {}, - "outputs": [], - "source": [ - "duckdb.sql(\"\"\"\n", - " drop table if exists probabilities_temp;\n", - " drop table if exists clusters_temp;\n", - " \n", - " create temp table probabilities_temp as\n", - " select\n", - " uuid,\n", - " link_type,\n", - " cluster,\n", - " id,\n", - " source,\n", - " probability\n", - " from\n", - " prob prob\n", - " where \n", - " prob.probability >= 0.7\n", - " and cluster != 0\n", - " order by\n", - " probability desc;\n", - " \n", - " create temp table clusters_temp as\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " clus_val;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "id": "cad43948-60dc-4570-b30b-25d06e81971a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 7 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 8 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 141, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────┬───────────┬─────────┬──────────────┬────────┬─────────────┐\n", - "│ uuid │ link_type │ cluster │ id │ source │ probability │\n", - "│ int64 │ varchar │ int64 │ varchar │ int64 │ double │\n", - "├───────┼───────────┼─────────┼──────────────┼────────┼─────────────┤\n", - "│ 2 │ link │ 1 │ will_inc_t2 │ 2 │ 0.9 │\n", - "│ 24 │ link │ 2 │ leo_inc_t4 │ 4 │ 0.9 │\n", - "│ 31 │ link │ 4 │ pedro_inc_t2 │ 2 │ 0.9 │\n", - "│ 38 │ link │ 3 │ pedro_inc_t4 │ 4 │ 0.9 │\n", - "│ 43 │ link │ 3 │ sarah_inc_t2 │ 2 │ 0.9 │\n", - "│ 25 │ link │ 3 │ leo_inc_t4 │ 4 │ 0.85 │\n", - "│ 6 │ link │ 1 │ will_inc_t3 │ 3 │ 0.8 │\n", - "│ 16 │ link │ 2 │ leo_inc_t2 │ 2 │ 0.8 │\n", - "│ 19 │ link │ 1 │ leo_inc_t3 │ 3 │ 0.8 │\n", - "│ 26 │ link │ 4 │ leo_inc_t4 │ 4 │ 0.8 │\n", - "│ 39 │ link │ 4 │ pedro_inc_t4 │ 4 │ 0.8 │\n", - "│ 44 │ link │ 4 │ sarah_inc_t2 │ 2 │ 0.8 │\n", - "│ 10 │ link │ 1 │ will_inc_t4 │ 4 │ 0.75 │\n", - "│ 50 │ link │ 2 │ sarah_inc_t4 │ 4 │ 0.75 │\n", - "│ 3 │ link │ 2 │ will_inc_t2 │ 2 │ 0.7 │\n", - "│ 7 │ link │ 2 │ will_inc_t3 │ 3 │ 0.7 │\n", - "│ 15 │ link │ 1 │ leo_inc_t2 │ 2 │ 0.7 │\n", - "│ 23 │ link │ 1 │ leo_inc_t4 │ 4 │ 0.7 │\n", - "│ 30 │ link │ 3 │ pedro_inc_t2 │ 2 │ 0.7 │\n", - "│ 42 │ link │ 2 │ sarah_inc_t2 │ 2 │ 0.7 │\n", - "│ 52 │ link │ 4 │ sarah_inc_t4 │ 4 │ 0.7 │\n", - "├───────┴───────────┴─────────┴──────────────┴────────┴─────────────┤\n", - "│ 21 rows 6 columns │\n", - "└───────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 141, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select * from clusters_temp;\n", - "\"\"\")\n", - "duckdb.sql(\"\"\"\n", - " select * from probabilities_temp;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "id": "de7c4b00-6bdb-4f9a-9aad-def8f833bd5f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 9 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 10 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 11 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 12 │ 3 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 13 │ 4 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 14 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 15 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 17 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_1 = duckdb.sql(\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " probabilities_temp prob\n", - " where \n", - " not exists (\n", - " select\n", - " id,\n", - " source\n", - " from\n", - " clusters_temp clus\n", - " where\n", - " clus.id = prob.id\n", - " and clus.source = prob.source\n", - " )\n", - " or not exists (\n", - " select\n", - " cluster,\n", - " source\n", - " from\n", - " clusters_temp clus\n", - " where\n", - " clus.cluster = prob.cluster\n", - " and clus.source = prob.source\n", - " )\n", - " order by\n", - " probability desc\n", - " ) agg;\n", - "\"\"\")\n", - "clus_1" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "id": "3b7dffc3-84b5-4cc5-b348-622d7919ed6e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "8" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(clus_1.df().index)" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "id": "0652f473-b798-42f6-b9d9-53f1f7bd7e3b", - "metadata": {}, - "outputs": [], - "source": [ - "duckdb.sql(\"\"\"\n", - " insert into clusters_temp \n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " clus_1;\n", - "\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "id": "878f43cf-6a19-482f-b9f7-20f2881ec2e2", - "metadata": {}, - "outputs": [], - "source": [ - "duckdb.sql(\"\"\"\n", - " delete from probabilities_temp prob_temp\n", - " where exists (\n", - " select \n", - " cl.cluster,\n", - " cl.id,\n", - " cl.source\n", - " from \n", - " clus_1 cl\n", - " where\n", - " (\n", - " cl.id = prob_temp.id\n", - " and cl.source = prob_temp.source\n", - " ) or (\n", - " cl.cluster = prob_temp.cluster\n", - " and cl.source = prob_temp.source\n", - " )\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "id": "216fa51a-a92d-4d7a-b50d-d1a62dd569a6", - "metadata": {}, - "outputs": [], - "source": [ - "duckdb.sql(\"\"\"\n", - " delete from probabilities_temp prob_temp\n", - " where exists (\n", - " select \n", - " cl.cluster,\n", - " cl.id,\n", - " cl.source\n", - " from \n", - " clus_1 cl\n", - " where\n", - " cl.id = prob_temp.id\n", - " and cl.cluster = prob_temp.cluster\n", - " and cl.source = prob_temp.source\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "id": "5516e6df-80c5-4e50-9434-4f3bd424c08f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 11 │ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 12 │ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 24 │ 0 │ will_inc_t1 │ 1 │ 1 │\n", - "│ 25 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 26 │ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 27 │ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 28 │ 3 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 29 │ 4 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 30 │ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 31 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 33 │ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "├───────┴─────────┴──────────────┴────────┴───────┤\n", - "│ 15 rows 5 columns │\n", - "└─────────────────────────────────────────────────┘" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────┬───────────┬─────────┬──────────────┬────────┬─────────────┐\n", - "│ uuid │ link_type │ cluster │ id │ source │ probability │\n", - "│ int64 │ varchar │ int64 │ varchar │ int64 │ double │\n", - "├───────┼───────────┼─────────┼──────────────┼────────┼─────────────┤\n", - "│ 1 │ link │ 0 │ will_inc_t1 │ 1 │ 1.0 │\n", - "│ 14 │ link │ 0 │ leo_inc_t1 │ 1 │ 1.0 │\n", - "│ 27 │ link │ 0 │ pedro_inc_t1 │ 1 │ 1.0 │\n", - "│ 40 │ link │ 0 │ sarah_inc_t1 │ 1 │ 1.0 │\n", - "│ 2 │ link │ 1 │ will_inc_t2 │ 2 │ 0.9 │\n", - "│ 24 │ link │ 2 │ leo_inc_t4 │ 4 │ 0.9 │\n", - "│ 38 │ link │ 3 │ pedro_inc_t4 │ 4 │ 0.9 │\n", - "│ 43 │ link │ 3 │ sarah_inc_t2 │ 2 │ 0.9 │\n", - "│ 31 │ link │ 4 │ pedro_inc_t2 │ 2 │ 0.9 │\n", - "│ 25 │ link │ 3 │ leo_inc_t4 │ 4 │ 0.85 │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ 39 │ link │ 4 │ pedro_inc_t4 │ 4 │ 0.8 │\n", - "│ 44 │ link │ 4 │ sarah_inc_t2 │ 2 │ 0.8 │\n", - "│ 10 │ link │ 1 │ will_inc_t4 │ 4 │ 0.75 │\n", - "│ 50 │ link │ 2 │ sarah_inc_t4 │ 4 │ 0.75 │\n", - "│ 3 │ link │ 2 │ will_inc_t2 │ 2 │ 0.7 │\n", - "│ 7 │ link │ 2 │ will_inc_t3 │ 3 │ 0.7 │\n", - "│ 15 │ link │ 1 │ leo_inc_t2 │ 2 │ 0.7 │\n", - "│ 23 │ link │ 1 │ leo_inc_t4 │ 4 │ 0.7 │\n", - "│ 42 │ link │ 2 │ sarah_inc_t2 │ 2 │ 0.7 │\n", - "│ 52 │ link │ 4 │ sarah_inc_t4 │ 4 │ 0.7 │\n", - "├───────┴───────────┴─────────┴──────────────┴────────┴─────────────┤\n", - "│ 24 rows (20 shown) 6 columns │\n", - "└───────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select * from clusters_temp;\n", - "\"\"\")\n", - "duckdb.sql(\"\"\"\n", - " select * from probabilities_temp;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "7feaf716-35a4-42c4-a0fe-55dc32696428", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clusteridsourcen
selfotherselfotherselfotherselfother
9NaNNaNsarah_inc_t2pedro_inc_t42.04.0NaNNaN
103.04.0pedro_inc_t4sarah_inc_t14.01.01.00.0
11NaNNaNsarah_inc_t1sarah_inc_t21.02.00.01.0
13NaNNaNpedro_inc_t4sarah_inc_t4NaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " cluster id source n \n", - " self other self other self other self other\n", - "9 NaN NaN sarah_inc_t2 pedro_inc_t4 2.0 4.0 NaN NaN\n", - "10 3.0 4.0 pedro_inc_t4 sarah_inc_t1 4.0 1.0 1.0 0.0\n", - "11 NaN NaN sarah_inc_t1 sarah_inc_t2 1.0 2.0 0.0 1.0\n", - "13 NaN NaN pedro_inc_t4 sarah_inc_t4 NaN NaN NaN NaN" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_check_l = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " clusters_temp\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - "\"\"\")\n", - "clus_check_r = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n_par::int as n\n", - " from\n", - " clus\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n_par\n", - "\"\"\")\n", - "clus_check_l.df().equals(clus_check_r.df())\n", - "clus_check_l.df().compare(clus_check_r.df())" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "a61915f6-d977-436a-af91-ca9e4de99757", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬──────────────┬────────┬───────┐\n", - "│ cluster │ id │ source │ n │\n", - "│ int64 │ varchar │ int64 │ int32 │\n", - "├─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "│ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 3 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 4 │ pedro_inc_t4 │ 4 │ 1 │\n", - "├─────────┴──────────────┴────────┴───────┤\n", - "│ 14 rows 4 columns │\n", - "└─────────────────────────────────────────┘" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌─────────┬──────────────┬────────┬───────┐\n", - "│ cluster │ id │ source │ n │\n", - "│ int64 │ varchar │ int64 │ int32 │\n", - "├─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 1 │ will_inc_t3 │ 3 │ 1 │\n", - "│ 1 │ will_inc_t4 │ 4 │ 1 │\n", - "│ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 2 │ leo_inc_t4 │ 4 │ 1 │\n", - "│ 3 │ pedro_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 3 │ pedro_inc_t4 │ 4 │ 1 │\n", - "│ 4 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ sarah_inc_t2 │ 2 │ 1 │\n", - "│ 4 │ sarah_inc_t3 │ 3 │ 1 │\n", - "│ 4 │ sarah_inc_t4 │ 4 │ 1 │\n", - "├─────────┴──────────────┴────────┴───────┤\n", - "│ 14 rows 4 columns │\n", - "└─────────────────────────────────────────┘" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_check_l\n", - "clus_check_r" - ] - }, - { - "cell_type": "markdown", - "id": "3c3846aa-3ed1-4785-b771-c654f8427408", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## v1\n", - "\n", - "I've made the data more complicated so the below won't now evaluate to True." - ] - }, - { - "cell_type": "markdown", - "id": "abf44d31-efed-4432-8256-64eb023654c8", - "metadata": {}, - "source": [ - "### Parallel\n", - "\n", - "Sometimes we might join several tables to `probabilities` at once, then add them to `clusters` together." - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "dc0ba81e-6ed0-48ed-9c37-3ed2bae2a91e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ sarah_inc_t1 │ 1 │ 0 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - "\"\"\")\n", - "clus_init" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "id": "87d76974-2408-46d4-9152-59ea7c78111a", - "metadata": {}, - "outputs": [], - "source": [ - "clus_complete = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob\n", - " anti join clus_init cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " where \n", - " probability > 0.7\n", - " order by\n", - " probability desc,\n", - " id desc\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_init\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "id": "132a8399-0b40-4827-ae02-4e97d69cb74b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_check_l = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " clus_complete\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - "\"\"\")\n", - "clus_check_r = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n_par::int as n\n", - " from\n", - " clus\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n_par\n", - "\"\"\")\n", - "clus_check_l.df().equals(clus_check_r.df())\n", - "clus_check_l.df().compare(clus_check_r.df())" - ] - }, - { - "cell_type": "markdown", - "id": "d6538182-58d7-4cc7-a238-572b43a4d655", - "metadata": {}, - "source": [ - "### Sequential\n", - "\n", - "Sometimes we'll add one table to `probabilities`, then resolve to `clusters`, then do that over and over." - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "id": "1dd5c01f-2210-4781-bfd9-acfa94f7cc3d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ sarah_inc_t1 │ 1 │ 0 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 132, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - "\"\"\")\n", - "clus_init" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "dcdbfc77-7bce-49ba-8564-9a25a0790649", - "metadata": {}, - "outputs": [], - "source": [ - "prob_n1 = duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " prob\n", - " where\n", - " source = 2\n", - "\"\"\")\n", - "prob_n2 = duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " prob\n", - " where\n", - " source = 3\n", - "\"\"\")\n", - "prob_n3 = duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " prob\n", - " where\n", - " source = 4\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "c0f0bae9-1358-472e-8e5b-82445aabacf7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 4 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 5 │ 4 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 6 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 134, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_n1 = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 1 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob_n1 prob\n", - " anti join clus_init cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " where\n", - " probability > 0.7\n", - " order by\n", - " probability desc,\n", - " id desc\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_init\n", - "\"\"\")\n", - "clus_n1" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "id": "96f72767-0493-4e73-b435-a88243a0f32e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 10 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 11 │ 4 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 12 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 13 │ 1 │ will_inc_t3 │ 3 │ 2 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_n2 = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 2 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob_n2 prob\n", - " anti join clus_n1 cl on\n", - " cl.id = prob.id\n", - " and cl.source = prob.source\n", - " where\n", - " probability > 0.7\n", - " order by\n", - " probability desc,\n", - " id desc\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_n1\n", - "\"\"\")\n", - "clus_n2" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "id": "af2f346e-6600-4a7f-9eba-bc83863757a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────┬─────────┬──────────────┬────────┬───────┐\n", - "│ uuid │ cluster │ id │ source │ n │\n", - "│ int64 │ int64 │ varchar │ int64 │ int32 │\n", - "├───────┼─────────┼──────────────┼────────┼───────┤\n", - "│ 1 │ 1 │ will_inc_t1 │ 1 │ 0 │\n", - "│ 2 │ 2 │ leo_inc_t1 │ 1 │ 0 │\n", - "│ 3 │ 3 │ sarah_inc_t1 │ 1 │ 0 │\n", - "│ 14 │ 1 │ will_inc_t2 │ 2 │ 1 │\n", - "│ 15 │ 4 │ pedro_inc_t2 │ 2 │ 1 │\n", - "│ 16 │ 2 │ leo_inc_t2 │ 2 │ 1 │\n", - "│ 27 │ 1 │ will_inc_t3 │ 3 │ 2 │\n", - "└───────┴─────────┴──────────────┴────────┴───────┘" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_n3 = duckdb.sql(\"\"\"\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " 3 as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " prob_n3 prob\n", - " anti join clus_n2 on\n", - " clus_n2.id = prob.id\n", - " and clus_n2.source = prob.source\n", - " where\n", - " probability > 0.7\n", - " order by\n", - " probability desc,\n", - " id desc\n", - " )\n", - " union\n", - " select\n", - " *\n", - " from\n", - " clus_n2\n", - "\"\"\")\n", - "clus_n3" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "id": "5a90a411-db08-4ed6-be5f-8224b729a9cd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clus_check_l1 = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " clus_n2\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - "\"\"\")\n", - "clus_check_l2 = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " clus_n3\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - "\"\"\")\n", - "clus_check_r = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n_seq::int as n\n", - " from\n", - " clus\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n_par\n", - "\"\"\")\n", - "clus_check_l1.df().equals(clus_check_r.df())\n", - "clus_check_l1.df().compare(clus_check_r.df())\n", - "clus_check_l2.df().equals(clus_check_r.df())\n", - "clus_check_l2.df().compare(clus_check_r.df())" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_prob2clus_2.ipynb b/notebooks/engineering/WL_prob2clus_2.ipynb deleted file mode 100644 index 1ac08fd..0000000 --- a/notebooks/engineering/WL_prob2clus_2.ipynb +++ /dev/null @@ -1,867 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4f6f17b1-2052-4322-acfe-dd271846311f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "3706608b-1690-48bb-b8aa-61ce1fb96a7d", - "metadata": {}, - "source": [ - "# Probabilities to cluster algorithm\n", - "\n", - "A notebook to hash out this algorithm and check it works.\n", - "\n", - "Will hopefully turn into a unit test too, hence CVSs into version control." - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "2d75838e-9781-463e-ac09-3a9097fb630b", - "metadata": {}, - "outputs": [], - "source": [ - "from cmf import locations as loc\n", - "from cmf.data import utils as du\n", - "\n", - "import pandas as pd\n", - "import duckdb\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "markdown", - "id": "2b32e1f2-199f-4d1a-b5b1-4b6d03fe6c8e", - "metadata": {}, - "source": [ - "Tests:\n", - "\n", - "* unambig_t2_e4\n", - "* unambig_t3_e2\n", - "* masked_t3_e3\n", - "* val_masked_t3_e2\n", - "* val_unambig_t3_e2" - ] - }, - { - "cell_type": "markdown", - "id": "2b4ae085-7b5d-4c76-b791-e06b20eed341", - "metadata": {}, - "source": [ - "## Helper functions" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "63668afd-da8e-4e0a-8b3b-7bac7311d2e2", - "metadata": {}, - "outputs": [], - "source": [ - "def validate_against_answer(my_cluster, validated_cluster, n_type = 'par'):\n", - " clus_check_l = duckdb.sql(\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n::int as n\n", - " from\n", - " my_cluster\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n\n", - " \"\"\")\n", - " clus_check_r = duckdb.sql(f\"\"\"\n", - " select\n", - " cluster,\n", - " id,\n", - " source,\n", - " n_{n_type}::int as n\n", - " from\n", - " validated_cluster\n", - " order by\n", - " cluster,\n", - " source,\n", - " id,\n", - " n_{n_type}\n", - " \"\"\")\n", - " return clus_check_l.df().equals(clus_check_r.df())" - ] - }, - { - "cell_type": "markdown", - "id": "1a8bf828-ffde-415b-a052-03635eadcea6", - "metadata": {}, - "source": [ - "## Formalise algorithm" - ] - }, - { - "cell_type": "markdown", - "id": "787a74dd-9b1c-43b6-8080-b1036bf9459e", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### DuckDB version" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "958be476-0014-45d7-8e76-80009512521f", - "metadata": {}, - "outputs": [], - "source": [ - "def resolve_clusters(prob, val, clus, n):\n", - " # The clusters are initialised outside the function, as in the\n", - " # real repo\n", - " # The \"where\" in validation is to prevent data leaking\n", - " # when we do this in steps. We only resolve against the \n", - " # sources in prob\n", - " clus_init = duckdb.sql(f\"\"\"\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " clus\n", - " union\n", - " select\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " {n} as n,\n", - " from\n", - " val\n", - " where \n", - " source in (\n", - " select\n", - " source\n", - " from\n", - " prob\n", - " )\n", - " \"\"\")\n", - " # Create a temporary probabilities table so we \n", - " # can delete stuff\n", - " # Create a temporary clusters table so duckDB can\n", - " # insert stuff. Wouldn't be needed in a database\n", - " duckdb.sql(\"\"\"\n", - " drop table if exists probabilities_temp;\n", - " drop table if exists clusters_temp;\n", - " \n", - " create temp table probabilities_temp as\n", - " select\n", - " uuid,\n", - " link_type,\n", - " cluster,\n", - " id,\n", - " source,\n", - " probability\n", - " from\n", - " prob prob\n", - " where \n", - " prob.probability >= 0.7\n", - " and cluster != 0\n", - " order by\n", - " probability desc;\n", - " \n", - " create temp table clusters_temp as\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " clus_init;\n", - " \"\"\")\n", - " # Find what we need to insert by comparing clusters_temp and\n", - " # probabilities_temp\n", - " # Insert it into clusters_temp\n", - " # Delete it from probabilities_temp\n", - " # Keep going until there's nothing to find\n", - " data_to_insert = True\n", - " while data_to_insert:\n", - " to_insert = duckdb.sql(f\"\"\"\n", - " select\n", - " distinct on (agg.id, agg.source)\n", - " nextval('uuid') as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " {n} as n,\n", - " from (\n", - " select\n", - " distinct on (prob.cluster, prob.source)\n", - " prob.*\n", - " from\n", - " probabilities_temp prob\n", - " where \n", - " not exists (\n", - " select\n", - " id,\n", - " source\n", - " from\n", - " clusters_temp clus\n", - " where\n", - " clus.id = prob.id\n", - " and clus.source = prob.source\n", - " )\n", - " or not exists (\n", - " select\n", - " cluster,\n", - " source\n", - " from\n", - " clusters_temp clus\n", - " where\n", - " clus.cluster = prob.cluster\n", - " and clus.source = prob.source\n", - " )\n", - " order by\n", - " probability desc\n", - " ) agg;\n", - " \"\"\")\n", - " \n", - " if len(to_insert.df().index) == 0:\n", - " data_to_insert = False\n", - " break\n", - " \n", - " duckdb.sql(\"\"\"\n", - " insert into clusters_temp \n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " to_insert;\n", - " \"\"\")\n", - "\n", - " duckdb.sql(\"\"\"\n", - " delete from probabilities_temp prob_temp\n", - " where exists (\n", - " select \n", - " cl.cluster,\n", - " cl.id,\n", - " cl.source\n", - " from \n", - " to_insert cl\n", - " where\n", - " cl.id = prob_temp.id\n", - " and cl.cluster = prob_temp.cluster\n", - " and cl.source = prob_temp.source\n", - " );\n", - " \"\"\")\n", - "\n", - " result = duckdb.sql(\"\"\"\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n,\n", - " from\n", - " clusters_temp;\n", - " \"\"\")\n", - "\n", - " return result.df()" - ] - }, - { - "cell_type": "markdown", - "id": "d9f8e39d-f40e-4aae-9f61-baa536f5d2ab", - "metadata": {}, - "source": [ - "### Postgres version" - ] - }, - { - "cell_type": "code", - "execution_count": 233, - "id": "12ff0322-41c3-4df0-8cf0-6d706168361f", - "metadata": {}, - "outputs": [], - "source": [ - "def resolve_clusters_pg(prob, val, clus, n, threshold: float = 0.7):\n", - " # This time we're reading and writing stuff from the DB\n", - " # Assume prob, val and clus are all table names\n", - " # (or possibly objects we get those names from)\n", - " clusters_temp = \"clusters_temp\"\n", - " probabilities_temp = \"probabilities_temp\"\n", - " to_insert_temp = \"to_insert_temp\"\n", - " \n", - " # Create a temporary clusters table to work with \n", - " # until the algorithm has finished, for safety\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists {clusters_temp};\n", - " create temporary table {clusters_temp} as\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n\n", - " from\n", - " {clus}\n", - " union\n", - " select\n", - " gen_random_uuid() as uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " {n} as n\n", - " from\n", - " {val}\n", - " where \n", - " source in (\n", - " select\n", - " source\n", - " from\n", - " {prob}\n", - " );\n", - " \"\"\")\n", - " # Create a temporary probabilities table so we \n", - " # can delete stuff\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists {probabilities_temp};\n", - " create temporary table {probabilities_temp} as\n", - " select\n", - " uuid,\n", - " link_type,\n", - " cluster,\n", - " id,\n", - " source,\n", - " probability\n", - " from\n", - " {prob} prob\n", - " where \n", - " prob.probability >= {threshold}\n", - " order by\n", - " probability desc;\n", - " \"\"\")\n", - " # Find what we need to insert by comparing clusters_temp and\n", - " # probabilities_temp\n", - " # Insert it into clusters_temp\n", - " # Delete it from probabilities_temp\n", - " # Keep going until there's nothing to find\n", - " data_to_insert = True\n", - " while data_to_insert:\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists {to_insert_temp};\n", - " create temporary table {to_insert_temp} as\n", - " select\n", - " \tdistinct on (id_rank.id, id_rank.source)\n", - " \tgen_random_uuid() as uuid,\n", - " \tid_rank.cluster,\n", - " \tid_rank.id,\n", - " \tid_rank.source,\n", - " \t{n} as n\n", - " from (\n", - " \tselect\n", - " \t\tdistinct on (clus_rank.cluster, clus_rank.source)\n", - " \t\tclus_rank.*,\n", - " \t\trank() over (\n", - " \t\t\tpartition by\n", - " \t\t\t\tclus_rank.id,\n", - " \t\t\t\tclus_rank.source\n", - " \t\t\torder by \n", - " \t\t\t\tclus_rank.probability desc\n", - " \t\t) as id_rank\n", - " \tfrom (\n", - " \t\tselect\n", - " \t\t\tprob.*,\n", - " \t\t\trank() over(\n", - " \t\t\t\tpartition by \n", - " \t\t\t\t\tprob.cluster, \n", - " \t\t\t\t\tprob.source\n", - " \t\t\t\torder by \n", - " \t\t\t\t\tprob.probability desc\n", - " \t\t\t) as clus_rank\n", - " \t\tfrom\n", - " \t\t\t{probabilities_temp} prob\n", - " \t) clus_rank\n", - " \twhere \n", - " \t\tclus_rank.clus_rank = 1\n", - " \t\tand (\n", - " \t\t\tnot exists (\n", - " \t\t\t\tselect\n", - " \t\t\t\t\tid,\n", - " \t\t\t\t\tsource\n", - " \t\t\t\tfrom\n", - " \t\t\t\t\t{clusters_temp} clus\n", - " \t\t\t\twhere\n", - " \t\t\t\t\tclus.id = clus_rank.id\n", - " \t\t\t\t\tand clus.source = clus_rank.source\n", - " \t\t\t)\n", - " \t\t\tor not exists (\n", - " \t\t\t\tselect\n", - " \t\t\t\t\tcluster,\n", - " \t\t\t\t\tsource\n", - " \t\t\t\tfrom\n", - " \t\t\t\t\t{clusters_temp} clus\n", - " \t\t\t\twhere\n", - " \t\t\t\t\tclus.cluster = clus_rank.cluster\n", - " \t\t\t\t\tand clus.source = clus_rank.source\n", - " \t\t\t)\n", - " \t\t)\n", - " \torder by\n", - " \t\tclus_rank.cluster, \n", - " \t\tclus_rank.source\n", - " ) id_rank\n", - " where\n", - " \tid_rank.id_rank = 1\n", - " order by\n", - " \tid_rank.id, \n", - " \tid_rank.source;\n", - " \"\"\")\n", - " \n", - " if du.check_table_empty(f\"{to_insert_temp}\"):\n", - " data_to_insert = False\n", - " break\n", - "\n", - " du.query_nonreturn(f\"\"\"\n", - " insert into {clusters_temp}\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n\n", - " from\n", - " {to_insert_temp};\n", - " \"\"\")\n", - "\n", - " du.query_nonreturn(f\"\"\"\n", - " delete from {probabilities_temp} prob_temp\n", - " where exists (\n", - " select \n", - " cl.cluster,\n", - " cl.id,\n", - " cl.source\n", - " from \n", - " {to_insert_temp} cl\n", - " where\n", - " (\n", - " cl.id = prob_temp.id\n", - " and cl.source = prob_temp.source\n", - " )\n", - " or (\n", - " cl.cluster = prob_temp.cluster\n", - " and cl.source = prob_temp.source\n", - " )\n", - " );\n", - " \"\"\")\n", - "\n", - " # New in this version -- add new items to clusters from temp\n", - " # where the cluster match UUID is new\n", - "\n", - " du.query_nonreturn(f\"\"\"\n", - " insert into {clus}\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n\n", - " from\n", - " {clusters_temp} ct\n", - " where not exists (\n", - " select\n", - " uuid,\n", - " cluster,\n", - " id,\n", - " source,\n", - " n\n", - " from\n", - " {clus} c\n", - " where\n", - " c.uuid = ct.uuid\n", - " );\n", - " \"\"\")\n", - "\n", - " # tidy up\n", - " \n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists {clusters_temp};\n", - " drop table if exists {probabilities_temp};\n", - " drop table if exists {to_insert_temp};\n", - " \"\"\")\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "316a8fdf-8559-4ed9-a4ee-1886a6156340", - "metadata": {}, - "source": [ - "## Testing" - ] - }, - { - "cell_type": "code", - "execution_count": 234, - "id": "82dfb797-80a4-432f-9c05-cbfb63ce0700", - "metadata": {}, - "outputs": [], - "source": [ - "tests = [\n", - " \"unambig_t2_e4\",\n", - " \"unambig_t3_e2\",\n", - " \"masked_t3_e3\",\n", - " \"val_masked_t3_e2\",\n", - " \"val_unambig_t3_e2\",\n", - "]" - ] - }, - { - "cell_type": "markdown", - "id": "9d950192-97ae-4838-994b-c5311c1e4bd7", - "metadata": {}, - "source": [ - "### DuckDB version\n", - "\n", - "#### Parallel tests" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "id": "d5fa2a94-0fb9-47c7-b3a6-5a08f937b799", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unambig_t2_e4 passed: True\n", - "unambig_t3_e2 passed: True\n", - "masked_t3_e3 passed: True\n", - "val_masked_t3_e2 passed: True\n", - "val_unambig_t3_e2 passed: True\n" - ] - } - ], - "source": [ - "for test in tests:\n", - " prob, clus, val = du.load_test_data(Path(loc.PROJECT_DIR, \"test\", test))\n", - " clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - " \"\"\")\n", - " my_answer = resolve_clusters(prob, val, clus_init, 1)\n", - " passed = validate_against_answer(my_answer, clus, n_type = 'par')\n", - " print(f\"{test} passed: {passed}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9208e4ab-147f-463a-80d9-9e39fea86616", - "metadata": {}, - "source": [ - "#### Sequential tests" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "id": "f22e500d-efce-4161-a2cc-7189fc2f2133", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unambig_t2_e4 passed: True\n", - "unambig_t3_e2 passed: True\n", - "masked_t3_e3 passed: True\n", - "val_masked_t3_e2 passed: True\n", - "val_unambig_t3_e2 passed: True\n" - ] - } - ], - "source": [ - "for test in tests:\n", - " prob, clus, val = du.load_test_data(Path(loc.PROJECT_DIR, \"test\", test))\n", - " clus_init = duckdb.sql(\"\"\"\n", - " drop sequence if exists uuid;\n", - " drop sequence if exists cluster;\n", - " create sequence uuid start 1;\n", - " create sequence cluster start 1;\n", - " select\n", - " nextval('uuid') as uuid,\n", - " nextval('cluster') as cluster,\n", - " id,\n", - " source,\n", - " 0 as n,\n", - " from\n", - " prob\n", - " where\n", - " cluster = 0\n", - " \"\"\")\n", - " prob_sequence_dict = {i - 1: g for i, g in prob.groupby('source')}\n", - " val_sequence_dict = {i - 1: g for i, g in val.groupby('source')}\n", - " for i in range(len(prob_sequence_dict)):\n", - " prob_n = prob_sequence_dict[i]\n", - " try:\n", - " val_n = val_sequence_dict[i]\n", - " except KeyError:\n", - " val_n = val.iloc[0:0]\n", - " clus_init = resolve_clusters(prob_n, val_n, clus_init, i)\n", - " my_answer = clus_init\n", - " passed = validate_against_answer(my_answer, clus, n_type = 'seq')\n", - " print(f\"{test} passed: {passed}\")" - ] - }, - { - "cell_type": "markdown", - "id": "898e5484-2abf-40a6-aa86-4858fcdd7133", - "metadata": {}, - "source": [ - "### Postgres version\n", - "\n", - "#### Parallel tests" - ] - }, - { - "cell_type": "code", - "execution_count": 236, - "id": "dceb28ca-a247-4e06-b421-de7e3edccbc6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unambig_t2_e4 passed: True\n", - "unambig_t3_e2 passed: True\n", - "masked_t3_e3 passed: True\n", - "val_masked_t3_e2 passed: True\n", - "val_unambig_t3_e2 passed: True\n" - ] - } - ], - "source": [ - "for test in tests:\n", - " prob, clus, val = du.load_test_data(Path(loc.PROJECT_DIR, \"test\", test))\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_prob;\n", - " create table _user_eaf4fd9a.temp_prob (\n", - " uuid bigint,\n", - " link_type text,\n", - " cluster bigint,\n", - " id text,\n", - " source bigint,\n", - " probability double precision\n", - " )\n", - " \"\"\")\n", - " du.data_workspace_write(\"_user_eaf4fd9a\", \"temp_prob\", prob, if_exists=\"append\")\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_val;\n", - " create table _user_eaf4fd9a.temp_val (\n", - " uuid bigint,\n", - " id text,\n", - " cluster bigint,\n", - " source bigint,\n", - " \"user\" text,\n", - " match bool \n", - " )\n", - " \"\"\")\n", - " du.data_workspace_write(\"_user_eaf4fd9a\", \"temp_val\", val, if_exists=\"append\")\n", - " du.query_nonreturn(f\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_clus;\n", - " create table _user_eaf4fd9a.temp_clus as\n", - " select\n", - " gen_random_uuid() as uuid,\n", - " row_number() over () as cluster,\n", - " init.id,\n", - " init.source,\n", - " 0 as n\n", - " from (\n", - " select \n", - " * \n", - " from \n", - " _user_eaf4fd9a.temp_prob\n", - " where\n", - " source = 1\n", - " ) init\n", - " \"\"\")\n", - " resolve_clusters_pg(\n", - " \"_user_eaf4fd9a.temp_prob\", \n", - " \"_user_eaf4fd9a.temp_val\", \n", - " \"_user_eaf4fd9a.temp_clus\",\n", - " 1,\n", - " 0.7\n", - " )\n", - " passed = validate_against_answer(\n", - " du.query(\"select * from _user_eaf4fd9a.temp_clus\"), \n", - " clus, \n", - " n_type = 'par'\n", - " )\n", - " du.query_nonreturn(\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_prob;\n", - " drop table if exists _user_eaf4fd9a.temp_clus;\n", - " drop table if exists _user_eaf4fd9a.temp_val;\n", - " \"\"\")\n", - " print(f\"{test} passed: {passed}\")" - ] - }, - { - "cell_type": "markdown", - "id": "b1f539bb-64ed-41be-bfbe-a0c42038eade", - "metadata": {}, - "source": [ - "#### Sequential tests" - ] - }, - { - "cell_type": "code", - "execution_count": 231, - "id": "c896b9c9-e2d0-4e61-b2ca-f01804e18899", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unambig_t2_e4 passed: True\n", - "unambig_t3_e2 passed: True\n", - "masked_t3_e3 passed: True\n", - "val_masked_t3_e2 passed: True\n", - "val_unambig_t3_e2 passed: True\n" - ] - } - ], - "source": [ - "for test in tests:\n", - " prob, clus, val = du.load_test_data(Path(loc.PROJECT_DIR, \"test\", test))\n", - " prob_sequence_dict = {i - 1: g for i, g in prob.groupby('source')}\n", - " val_sequence_dict = {i - 1: g for i, g in val.groupby('source')}\n", - "\n", - " # Initialise clusters -- involves some messy work with the prob table but nvm\n", - " du.query_nonreturn(\"drop table if exists _user_eaf4fd9a.temp_prob;\")\n", - " du.data_workspace_write(\"_user_eaf4fd9a\", \"temp_prob\", prob, if_exists=\"append\")\n", - " du.query_nonreturn(\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_clus;\n", - " create table _user_eaf4fd9a.temp_clus as\n", - " select\n", - " gen_random_uuid() as uuid,\n", - " row_number() over () as cluster,\n", - " init.id,\n", - " init.source,\n", - " 0 as n\n", - " from (\n", - " select \n", - " * \n", - " from \n", - " _user_eaf4fd9a.temp_prob\n", - " where\n", - " source = 1\n", - " ) init\n", - " \"\"\")\n", - " \n", - " for i in range(len(prob_sequence_dict)):\n", - " # Create probability table at step n\n", - " prob_n = prob_sequence_dict[i]\n", - " du.query_nonreturn(\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_prob;\n", - " create table _user_eaf4fd9a.temp_prob (\n", - " uuid bigint,\n", - " link_type text,\n", - " cluster bigint,\n", - " id text,\n", - " source bigint,\n", - " probability double precision\n", - " )\n", - " \"\"\")\n", - " du.data_workspace_write(\"_user_eaf4fd9a\", \"temp_prob\", prob_n, if_exists=\"append\")\n", - "\n", - " # Create validation table at step n\n", - " try:\n", - " val_n = val_sequence_dict[i]\n", - " except KeyError:\n", - " val_n = val.iloc[0:0]\n", - " du.query_nonreturn(\"\"\"\n", - " drop table if exists _user_eaf4fd9a.temp_val;\n", - " create table _user_eaf4fd9a.temp_val (\n", - " uuid bigint,\n", - " id text,\n", - " cluster bigint,\n", - " source bigint,\n", - " \"user\" text,\n", - " match bool \n", - " )\n", - " \"\"\")\n", - " du.data_workspace_write(\"_user_eaf4fd9a\", \"temp_val\", val_n, if_exists=\"append\")\n", - "\n", - " # Resolve clusters\n", - " resolve_clusters_pg(\n", - " \"_user_eaf4fd9a.temp_prob\", \n", - " \"_user_eaf4fd9a.temp_val\", \n", - " \"_user_eaf4fd9a.temp_clus\",\n", - " i,\n", - " 0.7\n", - " )\n", - " \n", - " my_answer = clus_init\n", - " passed = validate_against_answer(\n", - " du.query(\"select * from _user_eaf4fd9a.temp_clus\"), \n", - " clus, \n", - " n_type = 'seq'\n", - " )\n", - " print(f\"{test} passed: {passed}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_profilewrite.ipynb b/notebooks/engineering/WL_profilewrite.ipynb deleted file mode 100644 index 49f51c3..0000000 --- a/notebooks/engineering/WL_profilewrite.ipynb +++ /dev/null @@ -1,2248 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "de801863-e546-47e1-9652-b5e304b229a6", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "b057f70b-059e-4871-83d2-f5b36b510d75", - "metadata": {}, - "outputs": [], - "source": [ - "import cmf\n", - "from cmf import clean\n", - "from cmf.clean import steps\n", - "from cmf.data.results import ClusterResults, ProbabilityResults\n", - "from cmf.data.utils import sqa_profiled\n", - "from cmf.dedupers import NaiveDeduper\n", - "from cmf.helpers import cleaner, cleaners, selector\n", - "\n", - "from pandas import DataFrame\n", - "import logging\n", - "\n", - "db_logger = logging.getLogger(\"sqlalchemy.engine\")\n", - "db_logger.setLevel(logging.INFO)\n", - "db_logger_fh = logging.FileHandler(\"logging/sqlalchemy.log\")\n", - "db_logger_fh.setLevel(logging.INFO)\n", - "db_logger.addHandler(db_logger_fh)\n", - "\n", - "logic_logger = logging.getLogger(\"cmf_logic\")\n", - "logic_logger.setLevel(logging.INFO)\n", - "logic_logger_fh = logging.FileHandler(\"logging/cmf.log\")\n", - "logic_logger_fh.setLevel(logging.INFO)\n", - "logic_logger.addHandler(logic_logger_fh)" - ] - }, - { - "cell_type": "markdown", - "id": "1644eddc-62d3-403d-aac7-cd57b9a0680f", - "metadata": {}, - "source": [ - "## First model\n", - "\n", - "Data already in DB." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "3c730dea-3f63-475a-922a-446513a7c612", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"naive_export_wins_v1\"\n", - "_SOURCE = \"dbt.export_wins__wins_dataset\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ce812d3d-16b5-48bc-9caa-660ccb1b5bc2", - "metadata": {}, - "outputs": [], - "source": [ - "ew_selector = selector(\n", - " table=_SOURCE,\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "\n", - "ew_raw = cmf.query(selector=ew_selector, return_type=\"pandas\", limit=1_000)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ce49c55c-a2d4-429b-a945-ebb526a2b48e", - "metadata": {}, - "outputs": [], - "source": [ - "clean_generic_id = clean.cleaning_function(\n", - " steps.punctuation_to_spaces, steps.to_upper, steps.remove_whitespace\n", - ")\n", - "\n", - "clean_ew = cleaners(\n", - " cleaner(\n", - " clean.company_name, {\"column\": \"dbt_export_wins__wins_dataset_company_name\"}\n", - " ),\n", - " cleaner(\n", - " clean_generic_id, {\"column\": \"dbt_export_wins__wins_dataset_cdms_reference\"}\n", - " ),\n", - ")\n", - "\n", - "ew_clean = cmf.process(ew_raw, clean_ew)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "222d942d-8c69-44ad-af17-9f01f390fa6e", - "metadata": {}, - "outputs": [], - "source": [ - "ew_naive_deduper = cmf.make_deduper(\n", - " dedupe_run_name=_NAME,\n", - " description=\"Basic cleaning of name and CDMS column.\",\n", - " deduper=NaiveDeduper,\n", - " deduper_settings={\n", - " \"id\": \"data_sha1\",\n", - " \"unique_fields\": [\n", - " \"dbt_export_wins__wins_dataset_company_name\",\n", - " \"dbt_export_wins__wins_dataset_cdms_reference\",\n", - " ],\n", - " },\n", - " data=ew_clean,\n", - " data_source=_SOURCE,\n", - ")\n", - "\n", - "ew_deduped = ew_naive_deduper()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "486ef94a-c58a-4581-9fc7-11eab506d7e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modelleftleft_idrightright_idprobability
0naive_export_wins_v1dbt.export_wins__wins_datasetb'\\x04\\xa3}_\\xe6\\xdb\\xa0mK\\x98\\xf6\\x8b\\xba\\xaa...dbt.export_wins__wins_datasetb'\\xb0\\xfc\\x01\\x9b \\xc0tx\\xcd\\xe4g\\xc9\\x82\\x86...1
1naive_export_wins_v1dbt.export_wins__wins_datasetb'}\\xcd\\xb5\\xbbt\\xb2d\\xae>D\\xe8\\x12\\x02@i\\xd2\\...dbt.export_wins__wins_datasetb'\\xee\\xfd\\xe9\\xb3\\xad^XA\\xf3\\xd3\\xd6l\\xcfb6{\\...1
2naive_export_wins_v1dbt.export_wins__wins_datasetb'\\xb4k:#\\\\@\\x7f~v\\xac\\xdds\\xec\\xb3/\\xcd\\xd4.\\...dbt.export_wins__wins_datasetb'M6\\x12+H\\x808\\xc7O*\\xec{\\xa1o\\xb1#\\x19=\\x16:'1
\n", - "
" - ], - "text/plain": [ - " model left \\\n", - "0 naive_export_wins_v1 dbt.export_wins__wins_dataset \n", - "1 naive_export_wins_v1 dbt.export_wins__wins_dataset \n", - "2 naive_export_wins_v1 dbt.export_wins__wins_dataset \n", - "\n", - " left_id \\\n", - "0 b'\\x04\\xa3}_\\xe6\\xdb\\xa0mK\\x98\\xf6\\x8b\\xba\\xaa... \n", - "1 b'}\\xcd\\xb5\\xbbt\\xb2d\\xae>D\\xe8\\x12\\x02@i\\xd2\\... \n", - "2 b'\\xb4k:#\\\\@\\x7f~v\\xac\\xdds\\xec\\xb3/\\xcd\\xd4.\\... \n", - "\n", - " right \\\n", - "0 dbt.export_wins__wins_dataset \n", - "1 dbt.export_wins__wins_dataset \n", - "2 dbt.export_wins__wins_dataset \n", - "\n", - " right_id probability \n", - "0 b'\\xb0\\xfc\\x01\\x9b \\xc0tx\\xcd\\xe4g\\xc9\\x82\\x86... 1 \n", - "1 b'\\xee\\xfd\\xe9\\xb3\\xad^XA\\xf3\\xd3\\xd6l\\xcfb6{\\... 1 \n", - "2 b'M6\\x12+H\\x808\\xc7O*\\xec{\\xa1o\\xb1#\\x19=\\x16:' 1 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 1721 entries, 0 to 1720\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 model 1721 non-null string[pyarrow]\n", - " 1 left 1721 non-null string[pyarrow]\n", - " 2 left_id 1721 non-null object \n", - " 3 right 1721 non-null string[pyarrow]\n", - " 4 right_id 1721 non-null object \n", - " 5 probability 1721 non-null int32[pyarrow] \n", - "dtypes: int32[pyarrow](1), object(2), string[pyarrow](3)\n", - "memory usage: 185.2+ KB\n" - ] - } - ], - "source": [ - "ew_deduped.to_df().head(3)\n", - "ew_deduped.to_df().info()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "b1873d2e-745f-4e99-a19e-2b244f695ef6", - "metadata": {}, - "outputs": [], - "source": [ - "ew_clusters = cmf.to_clusters(\n", - " ew_clean, \n", - " results=ew_deduped, \n", - " key=\"data_sha1\", \n", - " threshold=1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "f90f7012-1706-4ca5-999c-6f70a4f857e7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parentchild
0b'\\r!\\xe9\\xe4q\\xc8\\x11\\xe6\\x96!\\xe1O\\x1b\\xf0\\x...b'\\x04\\xa3}_\\xe6\\xdb\\xa0mK\\x98\\xf6\\x8b\\xba\\xaa...
1b'\\r!\\xe9\\xe4q\\xc8\\x11\\xe6\\x96!\\xe1O\\x1b\\xf0\\x...b'\\xb0\\xfc\\x01\\x9b \\xc0tx\\xcd\\xe4g\\xc9\\x82\\x86...
2b'\\xde\\xd9>\\xf4!\\x1e\\xe7t\\xa1\\x90\\x05\\x9fS\\x91...b'}\\xcd\\xb5\\xbbt\\xb2d\\xae>D\\xe8\\x12\\x02@i\\xd2\\...
\n", - "
" - ], - "text/plain": [ - " parent \\\n", - "0 b'\\r!\\xe9\\xe4q\\xc8\\x11\\xe6\\x96!\\xe1O\\x1b\\xf0\\x... \n", - "1 b'\\r!\\xe9\\xe4q\\xc8\\x11\\xe6\\x96!\\xe1O\\x1b\\xf0\\x... \n", - "2 b'\\xde\\xd9>\\xf4!\\x1e\\xe7t\\xa1\\x90\\x05\\x9fS\\x91... \n", - "\n", - " child \n", - "0 b'\\x04\\xa3}_\\xe6\\xdb\\xa0mK\\x98\\xf6\\x8b\\xba\\xaa... \n", - "1 b'\\xb0\\xfc\\x01\\x9b \\xc0tx\\xcd\\xe4g\\xc9\\x82\\x86... \n", - "2 b'}\\xcd\\xb5\\xbbt\\xb2d\\xae>D\\xe8\\x12\\x02@i\\xd2\\... " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 1000 entries, 0 to 495\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 parent 1000 non-null object\n", - " 1 child 1000 non-null object\n", - "dtypes: object(2)\n", - "memory usage: 23.4+ KB\n" - ] - } - ], - "source": [ - "ew_clusters.to_df().head(3)\n", - "ew_clusters.to_df().info()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "6fcc85cd-3225-4bb8-86fd-42c6db3f0983", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sqa_profiled():\n\u001b[0;32m----> 2\u001b[0m \u001b[43mew_deduped\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_cmf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/company-matching/cmf/data/results.py:139\u001b[0m, in \u001b[0;36mResultsBaseDataclass.to_cmf\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 138\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetadata\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] Writing deduplication data\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 139\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_deduper_to_cmf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;66;03m# Linker\u001b[39;00m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Write model\u001b[39;00m\n\u001b[1;32m 143\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetadata\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] Registering model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/company-matching/cmf/data/results.py:336\u001b[0m, in \u001b[0;36mProbabilityResults._deduper_to_cmf\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 319\u001b[0m to_insert \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 320\u001b[0m session\u001b[38;5;241m.\u001b[39mquery(Dedupes)\n\u001b[1;32m 321\u001b[0m \u001b[38;5;241m.\u001b[39mjoin(sha1_dedupe_cte, sha1_dedupe_cte\u001b[38;5;241m.\u001b[39mc\u001b[38;5;241m.\u001b[39msha1 \u001b[38;5;241m==\u001b[39m Dedupes\u001b[38;5;241m.\u001b[39msha1)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[38;5;241m.\u001b[39mall()\n\u001b[1;32m 332\u001b[0m )\n\u001b[1;32m 334\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[TEST] got nodes \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(to_insert))\n\u001b[0;32m--> 336\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproposes_dedupes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclear\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m proposes_dedupes_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m()\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dd, r \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(to_insert, probabilities_to_add):\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/ext/associationproxy.py:1692\u001b[0m, in \u001b[0;36m_AssociationDict.clear\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1691\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mclear\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1692\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcol\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclear\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1291\u001b[0m, in \u001b[0;36m_dict_decorators..clear..clear\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mclear\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m-> 1291\u001b[0m \u001b[43m__del\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1292\u001b[0m fn(\u001b[38;5;28mself\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1111\u001b[0m, in \u001b[0;36m__del\u001b[0;34m(collection, item, _sa_initiator, key)\u001b[0m\n\u001b[1;32m 1109\u001b[0m executor \u001b[38;5;241m=\u001b[39m collection\u001b[38;5;241m.\u001b[39m_sa_adapter\n\u001b[1;32m 1110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m executor:\n\u001b[0;32m-> 1111\u001b[0m \u001b[43mexecutor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfire_remove_event\u001b[49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_sa_initiator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:728\u001b[0m, in \u001b[0;36mCollectionAdapter.fire_remove_event\u001b[0;34m(self, item, initiator, key)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mempty:\n\u001b[1;32m 726\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset_empty()\n\u001b[0;32m--> 728\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfire_remove_event\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 729\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mowner_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mowner_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mitem\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitiator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\n\u001b[1;32m 730\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1807\u001b[0m, in \u001b[0;36mCollectionAttributeImpl.fire_remove_event\u001b[0;34m(self, state, dict_, value, initiator, key)\u001b[0m\n\u001b[1;32m 1804\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msethasparent(instance_state(value), state, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1806\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m fn \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdispatch\u001b[38;5;241m.\u001b[39mremove:\n\u001b[0;32m-> 1807\u001b[0m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitiator\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_remove_token\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1809\u001b[0m state\u001b[38;5;241m.\u001b[39m_modified_event(dict_, \u001b[38;5;28mself\u001b[39m, NO_VALUE, \u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2270\u001b[0m, in \u001b[0;36mbackref_listeners..emit_backref_from_collection_remove_event\u001b[0;34m(state, child, initiator, **kw)\u001b[0m\n\u001b[1;32m 2264\u001b[0m check_for_dupes_on_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 2266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 2267\u001b[0m initiator \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m check_remove_token\n\u001b[1;32m 2268\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m initiator \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m check_replace_token\n\u001b[1;32m 2269\u001b[0m ):\n\u001b[0;32m-> 2270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m check_for_dupes_on_remove \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhas_dupes\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2271\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# when this event is called, the item is usually\u001b[39;49;00m\n\u001b[1;32m 2272\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# present in the list, except for a pop() operation.\u001b[39;49;00m\n\u001b[1;32m 2273\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mparent_impl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2274\u001b[0m \u001b[43m \u001b[49m\u001b[43mchild\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2275\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 2276\u001b[0m child_impl\u001b[38;5;241m.\u001b[39mpop(\n\u001b[1;32m 2277\u001b[0m child_state,\n\u001b[1;32m 2278\u001b[0m child_dict,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2281\u001b[0m passive\u001b[38;5;241m=\u001b[39mPASSIVE_NO_FETCH,\n\u001b[1;32m 2282\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:704\u001b[0m, in \u001b[0;36mhas_dupes\u001b[0;34m(sequence, target)\u001b[0m\n\u001b[1;32m 699\u001b[0m \u001b[38;5;66;03m# compare to .index version below, this version introduces less function\u001b[39;00m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# overhead and is usually the same speed. At 15000 items (way bigger than\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# a relationship-bound collection in memory usually is) it begins to\u001b[39;00m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# fall behind the other version only by microseconds.\u001b[39;00m\n\u001b[1;32m 703\u001b[0m c \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 704\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m sequence:\n\u001b[1;32m 705\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m item \u001b[38;5;129;01mis\u001b[39;00m target:\n\u001b[1;32m 706\u001b[0m c \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "with sqa_profiled():\n", - " ew_deduped.to_cmf()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "970225fe-7315-4bd7-b9eb-d5aa8a2bc55c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 175072 function calls (172334 primitive calls) in 0.464 seconds\n", - "\n", - " Ordered by: cumulative time\n", - "\n", - " ncalls tottime percall cumtime percall filename:lineno(function)\n", - " 1 0.000 0.000 0.464 0.464 /home/jovyan/company-matching/cmf/data/results.py:121(to_cmf)\n", - " 1 0.000 0.000 0.455 0.455 /home/jovyan/company-matching/cmf/data/results.py:508(_deduper_to_cmf)\n", - " 1 0.000 0.000 0.454 0.454 /home/jovyan/company-matching/cmf/data/results.py:439(_to_cmf_logic)\n", - " 63/15 0.000 0.000 0.244 0.016 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:95(_go)\n", - " 4 0.000 0.000 0.242 0.061 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1933(commit)\n", - " 6/4 0.000 0.000 0.242 0.061 :1(commit)\n", - " 6/4 0.000 0.000 0.242 0.061 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1249(commit)\n", - " 4 0.000 0.000 0.208 0.052 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2603(commit)\n", - " 4 0.000 0.000 0.208 0.052 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2720(_do_commit)\n", - " 4 0.000 0.000 0.208 0.052 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2695(_connection_commit_impl)\n", - " 4 0.000 0.000 0.208 0.052 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1123(_commit_impl)\n", - " 4 0.000 0.000 0.208 0.052 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:694(do_commit)\n", - " 4 0.208 0.052 0.208 0.052 {method 'commit' of 'psycopg2.extensions.connection' objects}\n", - " 8 0.000 0.000 0.172 0.022 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2247(execute)\n", - " 8 0.000 0.000 0.172 0.021 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2077(_execute_internal)\n", - " 8 0.000 0.000 0.164 0.020 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1372(execute)\n", - " 8 0.000 0.000 0.164 0.020 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:508(_execute_on_connection)\n", - " 8 0.000 0.000 0.164 0.020 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1589(_execute_clauseelement)\n", - " 8 0.000 0.000 0.162 0.020 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1790(_execute_context)\n", - " 8 0.000 0.000 0.144 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:921(do_execute)\n", - " 8 0.142 0.018 0.144 0.018 {method 'execute' of 'psycopg2.extensions.cursor' objects}\n", - " 2 0.000 0.000 0.131 0.066 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1214(orm_execute_statement)\n", - " 2 0.001 0.000 0.131 0.066 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:102(_bulk_insert)\n", - " 5 0.001 0.000 0.130 0.026 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:958(_emit_insert_statements)\n", - " 2 0.000 0.000 0.115 0.058 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1994(_exec_insertmany_context)\n", - " 6 0.000 0.000 0.039 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:283(orm_execute_statement)\n", - " 6 0.000 0.000 0.036 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1852(_exec_single_context)\n", - "3203/3202 0.001 0.000 0.031 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1063(get)\n", - "1277/1276 0.001 0.000 0.029 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1108(_fire_loader_callables)\n", - " 1275 0.001 0.000 0.028 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:867(_load_for_state)\n", - " 6/4 0.000 0.000 0.028 0.007 :1(_prepare_impl)\n", - " 6/4 0.000 0.000 0.028 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1214(_prepare_impl)\n", - " 10 0.000 0.000 0.028 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4282(flush)\n", - " 13 0.000 0.000 0.028 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:552(__get__)\n", - " 2 0.001 0.000 0.028 0.014 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4331(_flush)\n", - " 1 0.000 0.000 0.027 0.027 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:994(_emit_lazyload)\n", - " 2 0.000 0.000 0.020 0.010 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:441(execute)\n", - " 1 0.000 0.000 0.016 0.016 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2671(all)\n", - " 1 0.000 0.000 0.015 0.015 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1244(clear)\n", - " 637 0.000 0.000 0.015 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1099(__del)\n", - " 637 0.001 0.000 0.015 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:713(fire_remove_event)\n", - " 2 0.000 0.000 0.014 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:389(_generate_actions)\n", - " 24 0.000 0.000 0.014 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:518(execute)\n", - " 1274/637 0.002 0.000 0.014 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1795(fire_remove_event)\n", - " 2 0.000 0.000 0.013 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2842(_iter)\n", - " 5 0.000 0.000 0.013 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:536(__set__)\n", - " 1 0.000 0.000 0.013 0.013 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1914(set)\n", - " 1 0.000 0.000 0.013 0.013 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:772(bulk_replace)\n", - " 1274 0.001 0.000 0.013 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1129(append)\n", - " 1274 0.001 0.000 0.012 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:1085(__set)\n", - " 637 0.001 0.000 0.012 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:671(fire_append_event)\n", - " 2 0.000 0.000 0.011 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1775(all)\n", - " 2 0.000 0.000 0.011 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:546(_allrows)\n", - " 1274/637 0.001 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2236(emit_backref_from_collection_remove_event)\n", - " 3212 0.003 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:245(get_attribute_history)\n", - " 8 0.001 0.000 0.011 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1274(_init_compiled)\n", - " 1274/637 0.002 0.000 0.011 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1747(fire_append_event)\n", - " 12 0.000 0.000 0.011 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:205(chunks)\n", - " 2 0.000 0.000 0.011 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1689(_fetchall_impl)\n", - " 2 0.000 0.000 0.011 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2286(_fetchall_impl)\n", - " 637 0.000 0.000 0.010 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1898(pop)\n", - " 637 0.001 0.000 0.010 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1876(remove)\n", - " 5 0.001 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:221()\n", - " 1277 0.003 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1048(_instance)\n", - " 24 0.001 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:232(prop_has_changes)\n", - " 1274/637 0.001 0.000 0.007 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2200(emit_backref_from_collection_append_event)\n", - " 637 0.001 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1852(append)\n", - " 9 0.001 0.000 0.006 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:466(presort_saves)\n", - " 4 0.001 0.000 0.005 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1082(_remove_snapshot)\n", - " 2 0.000 0.000 0.005 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:468(finalize_flush_changes)\n", - " 2 0.001 0.001 0.005 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3291(_register_persistent)\n", - " 1 0.000 0.000 0.005 0.005 /home/jovyan/company-matching/cmf/data/utils/sha1.py:17(table_name_to_uuid)\n", - " 1 0.000 0.000 0.005 0.005 /home/jovyan/company-matching/cmf/data/results.py:70(_model_to_cmf)\n", - " 2 0.000 0.000 0.004 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:524(load_on_pk_identity)\n", - " 1 0.001 0.001 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:2268(_process_execute_defaults)\n", - " 1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_decorators.py:325(wrapper)\n", - " 1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:2051(to_dict)\n", - " 1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:78(to_dict)\n", - " 26 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:184(_log_info)\n", - " 29 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1436(info)\n", - " 645 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:635(_pks_changed)\n", - " 26 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1565(_log)\n", - " 1277 0.003 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:696(_expire)\n", - " 4 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:758(_deliver_insertmanyvalues_batches)\n", - " 1284 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1677(get_history)\n", - " 1000 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3599()\n", - " 4 0.002 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5325(_deliver_insertmanyvalues_batches)\n", - " 3 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:640(execute)\n", - " 645 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/sync.py:126(source_modified)\n", - " 3 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:40(save_obj)\n", - " 1000 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:713(uuid4)\n", - " 1639 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:169()\n", - " 1 0.001 0.001 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:225()\n", - " 1274 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:50(append)\n", - " 1274 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2077(get_collection)\n", - " 1642 0.002 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:324(_collect_insert_commands)\n", - " 26 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1591(handle)\n", - " 1 0.000 0.000 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3843(merge)\n", - " 1 0.000 0.000 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3930(_merge)\n", - " 26 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1645(callHandlers)\n", - " 1 0.000 0.000 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3539(get)\n", - " 1 0.000 0.000 0.003 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3716(_get_impl)\n", - " 26 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:939(handle)\n", - " 1274 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:575(_get_pending_mutation)\n", - " 26 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1178(emit)\n", - " 26 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1071(emit)\n", - " 8 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1377()\n", - " 2557 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:860(_modified_event)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2720(first)\n", - " 7668 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:538(dict)\n", - " 1004 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:138(__init__)\n", - " 1274 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:73(remove)\n", - " 2 0.001 0.001 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:981(_commit_all_states)\n", - " 1643 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1851(construct_params)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:830(_generate_lazy_clause)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3569(_get_state_attr_by_column)\n", - " 1643 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1487()\n", - " 26 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:916(format)\n", - " 26 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:650(format)\n", - " 639 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1242(get_history)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:779(_load_expired)\n", - " 642 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:209(_organize_states_for_save)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1578(load_scalar_attributes)\n", - " 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:485(load_on_ident)\n", - " 4 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:586(execute)\n", - " 2550 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:340(session)\n", - " 26 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:358(getMessage)\n", - " 1000 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:640(getquoted)\n", - " 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:642(__repr__)\n", - " 6 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:540(_raw_all_rows)\n", - " 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:728(_repr_params)\n", - " 2 0.001 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1122(process_saves)\n", - " 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:670(_compile_w_cache)\n", - " 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:742(_repr_param_dict)\n", - " 3000 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:226()\n", - " 637 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:1125(merge_with_history)\n", - " 639 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2402(from_scalar_attribute)\n", - " 637 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:1121(__init__)\n", - " 638 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2386(as_state)\n", - " 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1749(__exit__)\n", - " 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2429(close)\n", - " 639 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:306(register_object)\n", - " 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2531(_close_impl)\n", - " 639 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:504(new_instance)\n", - " 637 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2379(_merge)\n", - " 645 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1722(_connections_for_states)\n", - " 7 0.000 0.000 0.001 0.000 :1(close)\n", - " 1276 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4279(_contains_state)\n", - " 640 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3494(_identity_key_from_state)\n", - " 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1345(close)\n", - " 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2036(_connection_for_bind)\n", - " 672/669 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1143(__get__)\n", - " 639 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:932(_commit)\n", - " 23 0.000 0.000 0.001 0.000 {method 'join' of 'str' objects}\n", - " 30/20 0.000 0.000 0.001 0.000 :1(_connection_for_bind)\n", - " 1034 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:279(__str__)\n", - " 30/20 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1106(_connection_for_bind)\n", - " 637 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:326(__init__)\n", - " 1637 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5596()\n", - " 6 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:544()\n", - " 637 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:823(unloaded)\n", - " 2000 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:183(maybe_box_native)\n", - " 8 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:715(_get_batches)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2565(close)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2714(_do_close)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2698(_close_impl)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2692(_connection_rollback_impl)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2031(_process_parameters_for_postcompile)\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1105(_rollback_impl)\n", - " 1001 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:222()\n", - " 1277 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:219(get)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:691(do_rollback)\n", - " 6 0.001 0.000 0.001 0.000 {method 'rollback' of 'psycopg2.extensions.connection' objects}\n", - " 1000 0.001 0.000 0.001 0.000 {built-in method posix.urandom}\n", - " 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1272(oneshot)\n", - " 2548 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:507(__iter__)\n", - " 96 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:770(_clean_thread_parent_frames)\n", - " 640 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1306(_populate_full)\n", - " 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:311(_compiler)\n", - " 1276 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:148(contains_state)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1748(_sort_states)\n", - " 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1338(__init__)\n", - " 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:409(_generate_cache_key)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:555(orm_setup_cursor_result)\n", - " 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:345(_generate_cache_key)\n", - " 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:726(_emit_update_statements)\n", - " 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:824(__init__)\n", - " 26 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1550(makeRecord)\n", - " 10/2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:911(process)\n", - " 3 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:425(_collect_update_commands)\n", - " 5/2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:350(_compiler_dispatch)\n", - " 15/2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:131(_compiler_dispatch)\n", - " 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5617(visit_insert)\n", - " 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1495(_finalize_insert_update_commands)\n", - " 26 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:282(__init__)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2102(_fetchall_impl)\n", - " 38/10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:221(_gen_cache_key)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1129(fetchall)\n", - " 2929 0.001 0.000 0.001 0.000 {method 'intersection' of 'set' objects}\n", - " 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:787(begin)\n", - " 38 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:331(expect)\n", - " 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2667(__init__)\n", - " 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2689(_connection_begin_impl)\n", - " 26 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1060(flush)\n", - " 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1083(_begin_impl)\n", - " 2552 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:134(__getitem__)\n", - " 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:78(instances)\n", - " 6 0.000 0.000 0.001 0.000 {method 'fetchall' of 'psycopg2.extensions.cursor' objects}\n", - " 640 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:195(__init__)\n", - " 1669 0.000 0.000 0.001 0.000 {method 'update' of 'dict' objects}\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3218(_literal_execute_expanding_parameter)\n", - " 3195 0.001 0.000 0.001 0.000 {method 'intersection' of 'frozenset' objects}\n", - " 3277 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:927(process)\n", - " 26 0.001 0.000 0.001 0.000 {method 'flush' of '_io.TextIOWrapper' objects}\n", - " 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:70(excluded)\n", - " 1662 0.001 0.000 0.001 0.000 {method 'replace' of 'str' objects}\n", - " 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1132(__get__)\n", - " 1637 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1003()\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2473(_is_orphan)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2482(from_collection)\n", - " 4490 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2346(__bool__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:834(columns)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:852(c)\n", - " 3629 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2016(set_committed_value)\n", - " 640 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3504()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1636(_populate_column_collection)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:803(_generate_fromclause_column_proxies)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:127()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1703(_populate_separate_keys)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:568(append_multiple_without_event)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1708()\n", - " 1937 0.000 0.000 0.000 0.000 {method 'update' of 'set' objects}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:806()\n", - " 2594 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:374(__call__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2599(_make_proxy)\n", - " 1278 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:122(__len__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6426(dtypes)\n", - " 2631 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x55d0ef2a7380}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:563()\n", - " 1277 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:714()\n", - " 4528 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1613(operate)\n", - " 1277 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:942(process)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:450(operate)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:113()\n", - " 307 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:564(trunc)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3374(_register_altered)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:189(operate)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2680(row_processor)\n", - " 34 0.000 0.000 0.000 0.000 {built-in method builtins.sorted}\n", - " 14/13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:333()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:476(operate)\n", - " 637 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1363(_populate_partial)\n", - " 641 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3361()\n", - " 5233 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n", - " 14/4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:583(__eq__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:388(__init__)\n", - " 2671 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/results.py:468()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:114(_get_crud_params)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3291()\n", - " 11/5 0.000 0.000 0.000 0.000 {built-in method _operator.eq}\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:376(__eq__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2539(expunge_all)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:2421(drop_duplicates)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:746(_only_one_row)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:809(_instance_processor)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:1359(drop_duplicates)\n", - " 637 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:1128(append)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1787(_setup_result_proxy)\n", - " 48 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:785()\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:461(_detach_states)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:51(_boolean_compare)\n", - " 1285 0.000 0.000 0.000 0.000 {method 'issuperset' of 'frozenset' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:565()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3246(connect)\n", - " 642 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:607(_elements)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:131(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1547(itertuples)\n", - " 1329 0.000 0.000 0.000 0.000 {method 'difference' of 'set' objects}\n", - " 153 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:752()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1510(__init__)\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:159(replace)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:593(_scan_cols)\n", - " 1 0.000 0.000 0.000 0.000 {method 'extend' of 'list' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1618()\n", - " 40 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n", - "2747/2723 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1177(__getitem__)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:276(_generative)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1210(close)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2273(_fetchone_impl)\n", - " 637 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:1134(remove)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1513(close)\n", - " 307 0.000 0.000 0.000 0.000 {built-in method builtins.repr}\n", - " 1274 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:806(key)\n", - " 1290 0.000 0.000 0.000 0.000 {method 'difference_update' of 'set' objects}\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1390(_checkin)\n", - " 153 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:759()\n", - " 1639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:191()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:919(_finalize_fairy)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1690(_getitem_tuple)\n", - " 637 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3246(_render_bindtemplate)\n", - " 640 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:509(_cleanup)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1684(_fetchone_impl)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1419(__init__)\n", - " 645 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:382(states_for_mapper_hierarchy)\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:207(_add_unpresent)\n", - " 877 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:417(_deep_annotate)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2054(__init__)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1514(findCaller)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3287()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3271(raw_connection)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:219(_init_items)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1816(one)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1333(_set_parent_with_dispatch)\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:428(__setitem__)\n", - " 6/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:440(clone)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2349(empty)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:444(connect)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1082(process_deletes)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:539(_get_embedded_bindparams)\n", - " 48 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1388(enumerate)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1085(__getitem__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1261(_checkout)\n", - " 12 0.000 0.000 0.000 0.000 :1(connection)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:819(_append_param_parameter)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:677(execute)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2499()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:166(delete_obj)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2392()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1975(filter_by)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1033(_getitem_lowerdim)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:411(_create_bind_param)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3210(_set_parent)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:1364(_duplicated)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:711(checkout)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2503()\n", - " 1000 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:633(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2432(_on_table_attach)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:41(listen)\n", - " 432 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1109(ident)\n", - " 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1053(create_row_processor)\n", - " 1278 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3373(iterate_to_root)\n", - " 1277 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:620(__bool__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:807(_literal_coercion)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/algorithms.py:994(duplicated)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1990(visit_on_conflict_do_update)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4562(_bind_param)\n", - " 1000 0.000 0.000 0.000 0.000 {built-in method from_bytes}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2003()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:125(_annotate)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1949(_on_conflict_target)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:464(orm_pre_session_exec)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:301(_organize_states_for_delete)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:346(_per_mapper_flush_actions)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:936(traverse)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:881(__init__)\n", - " 4/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:219(_copy_internals)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:753(checkin)\n", - " 1004 0.000 0.000 0.000 0.000 {method 'count' of 'list' objects}\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:274(_as_annotated_instance)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1963()\n", - " 640 0.000 0.000 0.000 0.000 :1(set)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:882(traverse_using)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1720(_getitem_axis)\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3542(key)\n", - " 56 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:495()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1146(orm_pre_session_exec)\n", - " 1371 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:37(insert)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:985(connection)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1813(_autobegin_t)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2892(query)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2931(_construct_for_op)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_selectable_constructors.py:448(select)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2151()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1750()\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:440(__get__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1206(_get_rows_with_mask)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:236(__init__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:823(iterate)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:117(splitext)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:1210(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2352(_soft_close)\n", - " 144 0.000 0.000 0.000 0.000 {built-in method builtins.hasattr}\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1707(create_cursor)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5129(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:405(_safe_annotate)\n", - " 642 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:109()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:282(_set_entities)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:530(get)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:984(__init__)\n", - " 1292 0.000 0.000 0.000 0.000 {built-in method builtins.id}\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:140(basename)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:280(listen)\n", - " 4/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:847(in_)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:454(_return_conn)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3577(visit_bindparam)\n", - " 639 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}\n", - " 3/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2119(in_op)\n", - " 1002 0.000 0.000 0.000 0.000 {method 'encode' of 'str' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1854(_setup_dml_or_text_result)\n", - " 4/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:663(create_for_statement)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:3971(_ixs)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:255(visit_clauseelement)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:134(_do_return_conn)\n", - " 142/139 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5136()\n", - " 1 0.000 0.000 0.000 0.000 :1(__init__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3945(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/results.py:484()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1544(scalar)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1042()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1728(create_default_cursor)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:147(__init__)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:634(formatMessage)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1444(__init__)\n", - " 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:995(_get_context_loader)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2388()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:510(_validate_dtype)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1303(create_for_statement)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:133(put)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1791(first)\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:183(_for_instance)\n", - " 639 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:438(_pending_mutations)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2619()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:299(generate)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:212(_in_impl)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1581(pandas_dtype)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1199(_run_crud)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:265(__init__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:143(_do_get)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:465(__getattr__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:177(_listen)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:554(_statement_20)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6278(__getattr__)\n", - " 639 0.000 0.000 0.000 0.000 {method 'discard' of 'set' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:288()\n", - " 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:493(_mappers)\n", - " 47 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1689(isEnabledFor)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:557(_initialize_instance)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2770(__init__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1306(__getattr__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1953(get_rows_with_mask)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:363(_listen)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:982(_gen_cache_key_inst)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5393(safe_construct)\n", - " 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:273(__repr__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1489(cursor)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:333(base_listen)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5256(__init__)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:261(helper)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:469(keys)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:80(per_property_flush_actions)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2396()\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:432(format)\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1529(_soft_close)\n", - " 714 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:1375(cast)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:291(__init__)\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:180(_for_class)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2253(_soft_close)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1257(__get__)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:628(usesTime)\n", - " 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/encodings/utf_8.py:15(decode)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:517(sanitize_array)\n", - " 46 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:159(__getattr__)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:58(sort)\n", - " 638 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1170()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:175(get)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/inspection.py:118(inspect)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1028(_take_snapshot)\n", - " 1 0.000 0.000 0.000 0.000 :1(on_conflict_do_update)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1261(set)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/genericpath.py:121(_splitext)\n", - " 1 0.000 0.000 0.000 0.000 :1(limit)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:259(all_states)\n", - " 52 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:896(acquire)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2513()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:203(sub)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:460(get_children)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:77(find_cycles)\n", - " 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:101(_should_log_debug)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:669()\n", - " 8 0.000 0.000 0.000 0.000 {method 'cursor' of 'psycopg2.extensions.connection' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:288(get_dtypes)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/decl_base.py:2129(_declarative_constructor)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:302(check)\n", - " 58 0.000 0.000 0.000 0.000 {built-in method builtins.setattr}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1982(visit_on_conflict_do_nothing)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2625()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:519(run_generated_dispatch)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:789(__add__)\n", - " 1 0.000 0.000 0.000 0.000 :1(select_from)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3851(bindparam_string)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:97(is_bool_indexer)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:30(sort_as_subsets)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3172(_resolve_column)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1404(_reset)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:958(_validate_tuple_indexer)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2237(_gen_cache_key)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2600(limit)\n", - " 2 0.000 0.000 0.000 0.000 :1(filter)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:1068(_append_param_insert_hasdefault)\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:137(__init__)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:86(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4596(_box_col_values)\n", - " 2 0.000 0.000 0.000 0.000 :1(_begin)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5369(__getitem__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:1347(_setup_for_bulk_insert)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1841(_initialize_collection)\n", - " 80 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:389(__bool__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1562(_get_cache_stats)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1417(_offset_or_limit_clause)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6295(__setattr__)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:429(_format)\n", - " 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:123(__exit__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:541(_post_coercion)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2512()\n", - " 68 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:312()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2514()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1862(from_array)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/decl_api.py:1885(_inspect_decl_meta)\n", - " 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:795(_getitem)\n", - " 26 0.000 0.000 0.000 0.000 {method 'write' of '_io.TextIOWrapper' objects}\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3001(_autoflush)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:258()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:107(on_conflict_do_update)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:160()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:28(_event_key)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:591(append)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:1230(_create_insert_prefetch_bind_param)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:480()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6013(select_from)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:766()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:272(__init__)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:114(__enter__)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:94(_gen_annotations_cache_key)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:328(merge)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:447(expect_as_key)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:740(_generate)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:421(usesTime)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_selectable_constructors.py:61(alias)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:358(append_to_list)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1087(_literal_coercion)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2165()\n", - " 52 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:903(release)\n", - " 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:415(__getitem__)\n", - " 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4324(_is_clean)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1503(effective_returning)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:522(_inspect_mapped_class)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6049()\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/utils/sha1.py:79(list_to_value_ordered_sha1)\n", - " 26 0.000 0.000 0.000 0.000 {built-in method posix.getpid}\n", - " 38 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:42(_instancecheck)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1575(_validate_key)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6230(__finalize__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:913(from_execution_options)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2323(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1708(_factory)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1779(_bind_processors)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:52(normcase)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:359(_clone)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:997(_begin)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5433(_can_hold_identifiers_and_holds_name)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:27(__exit__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:120(_stored_in_collection)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:278(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:419(to_list)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1372(null_dml_result)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1432(_next)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:567(post_exec)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1041(_is_autocommit_isolation)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4050(__getitem__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:836(__iter__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:145(_get_option)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:345(__missing__)\n", - " 78 0.000 0.000 0.000 0.000 {method 'rfind' of 'str' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2625(check_bool_indexer)\n", - " 23 0.000 0.000 0.000 0.000 {built-in method _codecs.utf_8_decode}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:165(__setitem__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1674(__copy)\n", - " 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:311(__iter__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:477(initialize_collection)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:71(per_property_preprocessors)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1358(current_thread)\n", - " 5 0.000 0.000 0.000 0.000 {method 'sub' of 're.Pattern' objects}\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:119(getLevelName)\n", - " 128 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1787()\n", - " 1 0.000 0.000 0.000 0.000 :1(where)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2535(visit_column)\n", - " 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:557(__new__)\n", - " 195 0.000 0.000 0.000 0.000 {method 'keys' of 'dict' objects}\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3815(_resolve_value_to_type)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/posixpath.py:41(_get_sep)\n", - " 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7414(quote)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:332(for_modify)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1013(iget)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:885(_post_coercion)\n", - " 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:774(__hash__)\n", - " 3 0.000 0.000 0.000 0.000 {built-in method numpy.array}\n", - " 3 0.000 0.000 0.000 0.000 :1(unique)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:714(_get_plugin_class_for_plugin)\n", - " 52 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:791(filter)\n", - " 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:368(__init__)\n", - " 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:486()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1789()\n", - " 1 0.000 0.000 0.000 0.000 :1(on_conflict_do_nothing)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1675()\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:208(_effective_processors)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:501(_deep_deannotate)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:663(_constructor_from_mgr)\n", - " 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1544(self_group)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:308(_get_reversed_processed_set)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/typing.py:300(is_non_string_iterable)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:339(_from_mgr)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:860(dialect_impl)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7603(format_column)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:137(is_object_dtype)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:88(_annotations_cache_key)\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/utils/sha1.py:89()\n", - " 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:175(_expect_state)\n", - " 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:1152(create_row_processor)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:336(_accept_with)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:187(_join)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:408(_clone)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1037(coerce_compared_value)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:925(clear)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:301(register_preprocessor)\n", - " 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:417(_gen_cache_key)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:806(_set_axis)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:934(_init_collections)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:728(alias)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5322(__new__)\n", - " 38 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:37(_check)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1482(_init_metadata)\n", - " 56 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.RLock' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/algorithms.py:106(_ensure_data)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:826(_values)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:185(_make_key_to_index)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:351(notify)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:256(__enter__)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:371()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:418(__len__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2758(check_dict_or_set_indexers)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2220(_clone)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:306()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:348(__add__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:165(simplefilter)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2198(_safe_close_cursor)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1894()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5116(_create_raw_select)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5183(__new__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1145(scalars)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7570(format_table)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5244(__get__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:784(values)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:252(create_for_statement)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:301(_annotate)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1603(_construct)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:127(_get_single_key)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:672(_constructor_sliced_from_mgr)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:461(bindparam)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4056(_memo)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1668(_validate_integer)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:289(_compile)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:570(_log_notices)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7687(maybe_extract_name)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:539(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:808(_all_selected_columns)\n", - " 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2638(get_bind)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1493(__getattr__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:388(_commit_removals)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:954()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:508(clone)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2174(_entity_namespace_key)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:628(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:106(remove)\n", - " 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:909(__len__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3056(_resolve_col_tokens)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1183(is_bool_dtype)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1093(name)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1885(filter)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:164(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5442()\n", - " 58 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1589(__iter__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:779(name)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1999(external_values)\n", - " 8 0.000 0.000 0.000 0.000 {built-in method builtins.any}\n", - " 78 0.000 0.000 0.000 0.000 {built-in method posix.fspath}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1652(_soft_close)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:821(get_connection)\n", - " 68 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:313()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5319(__contains__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1036(shape)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1419(_is_dtype_type)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:419(check_array_indexer)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2709(new_block)\n", - " 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1590(executemany)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:288(__new__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1703(render_bind_cast)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:387(standardize_mapping)\n", - " 28 0.000 0.000 0.000 0.000 {method 'copy' of 'dict' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5958(where)\n", - " 26 0.000 0.000 0.000 0.000 {method 'find' of 'str' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1396(_emit_delete_statements)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:183()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:104(_should_log_info)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/multiprocessing/process.py:189(name)\n", - " 26 0.000 0.000 0.000 0.000 {built-in method sys._getframe}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:763(_try_cast)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:181(_add_filter)\n", - " 47 0.000 0.000 0.000 0.000 {built-in method builtins.hash}\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:347(opt_manager_of_class)\n", - " 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:373(__hash__)\n", - " 7 0.000 0.000 0.000 0.000 {built-in method builtins.all}\n", - " 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:112(check_modified)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1229(_set_memoized_attribute)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:878(per_property_preprocessors)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2659(_get_entity_clauses)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:306(_with_annotations)\n", - " 1 0.000 0.000 0.000 0.000 {built-in method _functools.reduce}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:496(popitem)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:347(_expand_composites)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1653(_is_scalar_access)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:469(_fallback_getattr)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:873(_unwrapped_dialect_impl)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:43(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2155(_entity_namespace)\n", - " 2 0.000 0.000 0.000 0.000 :1(_generated_get_children_traversal)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:530(_new_state_if_none)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:259(__exit__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:470(_key_getters_for_crud_column)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:229(_put)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1587(__get__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5428(apply_placeholders)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:489(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3840(set_label_style)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:833(__init__)\n", - " 52 0.000 0.000 0.000 0.000 {built-in method _thread.get_ident}\n", - " 31 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1615(_init)\n", - " 26 0.000 0.000 0.000 0.000 {built-in method time.time}\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2003(internal_values)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:669(_sliced_from_mgr)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:237(set_axis)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:493(__call__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1995(_dispose_previous_collection)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:117(__instancecheck__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:1367(merge)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:79(_is_literal)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:185(__iter__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:573(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:348(__new__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1720(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:445(_row_getter)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:458(__enter__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:155()\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:172(get)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:349(_red)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:715(visit_has_cache_key_list)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2638(maybe_coerce_values)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:226(_full)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:836(__add__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:903(_cached_bind_processor)\n", - " 2 0.000 0.000 0.000 0.000 {built-in method _hashlib.openssl_sha1}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/crud.py:1515(_get_returning_modifiers)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:975(_is_nested_tuple_indexer)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:248(_select_iterables)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:708(_set_get_options)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:232(_get)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:585(_get_axis)\n", - " 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:99()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4857(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1556(validate_all_hashable)\n", - " 50 0.000 0.000 0.000 0.000 {built-in method builtins.iter}\n", - " 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:334(is_hashable)\n", - " 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:231(memo)\n", - " 56 0.000 0.000 0.000 0.000 {method 'release' of '_thread.RLock' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:195(_state_session)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:131(coerce_to_immutabledict)\n", - " 6 0.000 0.000 0.000 0.000 :1()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:649(_simple_new)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:234(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3096(_link_to_col_by_colstring)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2481(is_boolean)\n", - " 3 0.000 0.000 0.000 0.000 {method 'take' of 'numpy.ndarray' objects}\n", - " 16 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2295(to_instance)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1221(_reset_memoizations)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:249(external_values)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1532(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:652(_getitem)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1339(_post_coercion)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4614(_get_item_cache)\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:635(__init__)\n", - " 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/multiprocessing/process.py:37(current_process)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:512(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:729(name)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:279()\n", - " 4 0.000 0.000 0.000 0.000 :1(_generated_cache_key_traversal)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:566(require_length_match)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:727()\n", - " 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/util.py:105(_trans_ctx_check)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:218(_acquireLock)\n", - " 10 0.000 0.000 0.000 0.000 {method 'union' of 'set' objects}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:21(__enter__)\n", - " 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:34(using_copy_on_write)\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/utils/db.py:17(get_schema_table_names)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:4380(_event_on_init)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:86(_validate_set_axis)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:395(__init__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1231(__init__)\n", - " 8 0.000 0.000 0.000 0.000 {method 'close' of 'psycopg2.extensions.cursor' objects}\n", - " 13 0.000 0.000 0.000 0.000 {method 'get' of 'mappingproxy' objects}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:857(_unique_strategy)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:693(_sanitize_ndim)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:155(_deannotate)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1546(for_context)\n", - " 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:981(_is_transaction_boundary)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:188()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:666(_info_axis)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:389(object_mapper)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_weakrefset.py:17(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:292()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:340(dispatch_is)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3814(_truncate_bindparam)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:51(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:360(_mapper_for_dep)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1461(_set_as_cached)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:604(__str__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:635(_get_root)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1182()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:984(per_property_dependencies)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2914(_identity_lookup)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5517(__contains__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2171(process_expanding)\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/utils/db.py:162(sqa_profiled)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:181(__len__)\n", - " 2 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/utils/sha1.py:67(prep_for_hash)\n", - " 2 0.000 0.000 0.000 0.000 :398(parent)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2231(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:951(process)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:611(__iter__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2677(_deactivate_from_connection)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:269(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:676(_translate_key)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:559(connection)\n", - " 3 0.000 0.000 0.000 0.000 {built-in method _abc._abc_instancecheck}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:382(entity_namespace)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:353(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2820(external_values)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:936(_expand_ellipsis)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2731(is_label_like)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:518(_inc_counter)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:218(__init__)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:556(_implicit_coercions)\n", - " 10 0.000 0.000 0.000 0.000 {method '__enter__' of '_thread.RLock' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2667(get_block_type)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:416(extract_array)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:223(_resolve_for_literal)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:430(has_intersection)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1675(getEffectiveLevel)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1439(duck_type_collection)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:248(is_mapped)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:549(find)\n", - " 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1084(_effective_plugin_target)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:798(tolist)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/core/displayhook.py:258(__call__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3414(primary_base_mapper)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:257(_adjust_fn_spec)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:227(_releaseLock)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:815(_post_coercion)\n", - " 28 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:437(__init__)\n", - " 12 0.000 0.000 0.000 0.000 {built-in method from_iterable}\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5281(type)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4826(_dirty_states)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:792()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1836(__init__)\n", - " 10 0.000 0.000 0.000 0.000 {built-in method time.perf_counter}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1748(set_creation_order)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:481(ensure_wrapped_if_datetimelike)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1064(soft_close)\n", - " 23 0.000 0.000 0.000 0.000 {method 'isdisjoint' of 'set' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2238(_extra_kwargs)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1041(unique)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:289()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1788(as_readonly)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1016(_autoincrement_column)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:682(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/dml.py:173(on_conflict_do_nothing)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:121(__subclasscheck__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4886(_clone)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:909(__len__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2500(is_associative)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/topological.py:54()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:370(apply_if_callable)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:1048(presort_saves)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:607(compare_values)\n", - " 16 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:106()\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:672(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2378(_check_configure)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:87(allows_duplicate_labels)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1575()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:477(__exit__)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1608(engine)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1591()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:173(_get_table_key)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2458(is_comparison)\n", - " 2 0.000 0.000 0.000 0.000 {method 'digest' of '_hashlib.HASH' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:524(_still_open_and_dbapi_connection_is_valid)\n", - " 3 0.000 0.000 0.000 0.000 {built-in method _weakref._remove_dead_weakref}\n", - " 4 0.000 0.000 0.000 0.000 {method 'search' of 're.Pattern' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1600(__getattr__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:271(inner)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:631(visit_with_context_options)\n", - " 1 0.000 0.000 0.000 0.000 /home/jovyan/company-matching/cmf/data/results.py:66(_get_results_type)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7592(ensure_index)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:48(_kill)\n", - " 4 0.000 0.000 0.000 0.000 {method 'popitem' of 'dict' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:495(get_impl)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5447()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1174(key)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:467(_cloned_set)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:123()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2521(iterate_properties)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/methods/to_dict.py:160()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:808(unmodified)\n", - " 3 0.000 0.000 0.000 0.000 {method 'remove' of 'list' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7375(quote_schema)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:585()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:420(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:182(__len__)\n", - " 16 0.000 0.000 0.000 0.000 {method 'remove' of 'set' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:134()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:571(_get_axis_number)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:181(blknos)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2947(_non_hashable_value)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1609(__getitem__)\n", - " 8 0.000 0.000 0.000 0.000 {method 'strip' of 'str' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2111(__init__)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1303(_fallback_getattr)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:223()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:617(_select_options)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1660(get)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:572(get_impl)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3764(__init__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:368(_resolve_for_literal)\n", - " 1 0.000 0.000 0.000 0.000 {function _list_decorators..clear..clear at 0x7f6cb2910b80}\n", - " 4 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:732(_sanitize_str_dtypes)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:223(_empty)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:752(_maybe_repeat)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2227(_gen_static_annotations_cache_key)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:309(is_null_slice)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:439(_no_limit_offset)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:447(get_from_identity)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:760(_generate)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:313()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:185()\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:982()\n", - " 11 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1711()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1275(memo)\n", - " 10 0.000 0.000 0.000 0.000 {method '_is_owned' of '_thread.RLock' objects}\n", - " 9 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:161(iloc)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2765()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:446(mapper)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:437()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/core/displayhook.py:70(check_for_underscore)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:993(_validate_key_length)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:2276()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1384(_get_dtype)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:74(__len__)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1666()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1253(iget)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:885(mapper)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:408(object_state)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1563(keys)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1030(in_transaction)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:252(_key)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1167(_post_coercion)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:195(is_array_like)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:1181()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:245(keys)\n", - " 11 0.000 0.000 0.000 0.000 {method 'popleft' of 'collections.deque' objects}\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1734(pre_exec)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:62(is_list_like_indexer)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:80(_memoized_attr_ref)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:427(_no_statement_condition)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:404(flags)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:152(cast_scalar_indexer)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2955(row_processor)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:176(_message_formatter)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:209(has_work)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1377(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:455(__contains__)\n", - " 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:446()\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:354(_listen_fn)\n", - " 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:872(_gen_cache_key)\n", - " 3 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2774()\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:649(_get_deprecated_option)\n", - " 1 0.000 0.000 0.000 0.000 {built-in method _abc._abc_subclasscheck}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:342(__init__)\n", - " 4 0.000 0.000 0.000 0.000 {built-in method numpy.asarray}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3982(__bool__)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1939(_block)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:341()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:262(_fast_discard)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:376(__init__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1732(unique)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1019(axes)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:73(isclass)\n", - " 2 0.000 0.000 0.000 0.000 {built-in method builtins.min}\n", - " 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1324(memo)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/_typing.py:132(is_composite_class)\n", - " 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:688(do_begin)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5736()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1009(_iterate_self_and_parents)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:639()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:156(_adjust_fn_spec)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:256(with_wrapper)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:362(attrs)\n", - " 9 0.000 0.000 0.000 0.000 {built-in method _warnings._filters_mutated}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:55(allows_duplicate_labels)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:831(_reset_identity)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:232(_propagate_attrs)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:942()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_validators.py:226(validate_bool_kwarg)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:438()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1276(disable)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:688(_collect_delete_commands)\n", - " 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:562(_literal_coercion)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1713()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1366(asint)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:202(_copy_internals)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:380(__clause_element__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:370(remove)\n", - " 2 0.000 0.000 0.000 0.000 {method 'rpartition' of 'str' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:121(classes)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:239(__eq__)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:224()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:394(visit_clauseelement)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:502(_setup_orm_returning)\n", - " 2 0.000 0.000 0.000 0.000 {method 'group' of 're.Match' objects}\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:455(_constructor)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1933(_filter_by_zero)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:197(blklocs)\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5140(_values)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5452()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5872()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:695(ndim)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/dml.py:810()\n", - " 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:994(hard_close)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1715()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1677(_attributes)\n", - " 1 0.000 0.000 0.000 0.000 {method 'issubset' of 'frozenset' objects}\n", - " 3 0.000 0.000 0.000 0.000 {method 'insert' of 'list' objects}\n", - " 2 0.000 0.000 0.000 0.000 :1(_generated_copy_internals_traversal)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:432(_inspect_mapped_object)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:991(soft_close)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:623(_fire_append_wo_mutation_event_bulk)\n", - " 1 0.000 0.000 0.000 0.000 {method 'tolist' of 'numpy.ndarray' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:256()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3461(identity_key_from_primary_key)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1451(is_valid)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:529(bulk_appender)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:448(visit_dml_multi_values)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:109(_dirty_states)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:590(_validate_dialect_kwargs)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2951(_null_column_type)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1988(index)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3139(entity_namespace)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:105()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:986(_gen_cache_key)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:379(__enter__)\n", - " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:136(__contains__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:168(_instance_dict)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1470(_clear_item_cache)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:663(_copy_callables)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1746(__enter__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/bulk_persistence.py:160()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:400(visit_clauseelement_tuple)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:42(warn_copy_on_write)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1566()\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:659(_constructor)\n", - " 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1295(_post_coercion)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:451(visit_propagate_attrs)\n", - " 3 0.000 0.000 0.000 0.000 {method 'isascii' of 'str' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:382(__exit__)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:397(visit_clauseelement_list)\n", - " 1 0.000 0.000 0.000 0.000 {method 'clear' of 'set' objects}\n", - " 1 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:406(visit_fromclause_canonical_column_collection)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1519()\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:363(ndim)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2622(visit_UUID)\n", - " 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:199(_clone)\n", - " 1 0.000 0.000 0.000 0.000 {method 'update' of '_hashlib.HASH' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2239(_within_exec_param_key_getter)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:247(items)\n", - " 1 0.000 0.000 0.000 0.000 {method 'bit_length' of 'int' objects}\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:442(_empty_collections)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:706(_resolve_for_literal)\n", - " 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:693(_fire_remove_event_bulk)\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "with sqa_profiled():\n", - " ew_clusters.to_cmf()" - ] - }, - { - "cell_type": "markdown", - "id": "3d0e98de-fb60-424c-9bf0-152835d947e4", - "metadata": {}, - "source": [ - "## Second model\n", - "\n", - "Brand new data." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "80416564-5b7a-4faa-9131-bce0f7791965", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"naive_export_wins_v2\"\n", - "_SOURCE = \"dbt.export_wins__wins_dataset\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9bb972b4-a4f7-45b0-88e3-6b2a181c9b64", - "metadata": {}, - "outputs": [], - "source": [ - "ew_selector = selector(\n", - " table=_SOURCE,\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "\n", - "ew_raw = cmf.query(selector=ew_selector, return_type=\"pandas\", limit=1_000)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "764ebc1e-3bf5-4720-b881-aeb44472cd08", - "metadata": {}, - "outputs": [], - "source": [ - "clean_generic_id = clean.cleaning_function(\n", - " steps.punctuation_to_spaces, steps.to_upper, steps.remove_whitespace\n", - ")\n", - "\n", - "clean_ew = cleaners(\n", - " cleaner(\n", - " clean.company_name, {\"column\": \"dbt_export_wins__wins_dataset_company_name\"}\n", - " ),\n", - " cleaner(\n", - " clean_generic_id, {\"column\": \"dbt_export_wins__wins_dataset_cdms_reference\"}\n", - " ),\n", - ")\n", - "\n", - "ew_clean = cmf.process(ew_raw, clean_ew)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "15d1ca67-f1b8-401d-8563-0315c8eeb648", - "metadata": {}, - "outputs": [], - "source": [ - "ew_naive_deduper = cmf.make_deduper(\n", - " dedupe_run_name=_NAME,\n", - " description=\"Basic cleaning of name and CDMS column.\",\n", - " deduper=NaiveDeduper,\n", - " deduper_settings={\n", - " \"id\": \"data_sha1\",\n", - " \"unique_fields\": [\n", - " \"dbt_export_wins__wins_dataset_company_name\",\n", - " \"dbt_export_wins__wins_dataset_cdms_reference\",\n", - " ],\n", - " },\n", - " data=ew_clean,\n", - " data_source=_SOURCE,\n", - ")\n", - "\n", - "ew_deduped = ew_naive_deduper()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "01222a89-c41c-462d-b65c-31465f2a235e", - "metadata": {}, - "outputs": [], - "source": [ - "ew_clusters = cmf.to_clusters(\n", - " ew_clean, \n", - " results=ew_deduped, \n", - " key=\"data_sha1\", \n", - " threshold=1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "69d9cef5-8191-4e69-bb3d-5ffc93582494", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
parentchild
0b'r\\x15\\xa1\\xb5G\\x8f)\\xce\\xc4\\x90\\x99\\xcb\\x98i...b'\\x1d\\x80\\xb3\\xbd\\x8ar\\xf57QE\\xc6\\x9a}\\xd0\\xc...
1b'r\\x15\\xa1\\xb5G\\x8f)\\xce\\xc4\\x90\\x99\\xcb\\x98i...b'\\xa7\\x04:\\xc1\\xbd\\xf74h\\\\\\x93G>)\\x81\\xd69\\xe...
2b' ^\\x9f\\xe9~+*l\\xc2\\xe2C\\x1c\\xdb!ENx\\xb5\\xb8\\...b'+\\xf6\\xca\\x88\\xac\\x83JQ\\x8c\\xb85\\x837\\x13\\x0...
\n", - "
" - ], - "text/plain": [ - " parent \\\n", - "0 b'r\\x15\\xa1\\xb5G\\x8f)\\xce\\xc4\\x90\\x99\\xcb\\x98i... \n", - "1 b'r\\x15\\xa1\\xb5G\\x8f)\\xce\\xc4\\x90\\x99\\xcb\\x98i... \n", - "2 b' ^\\x9f\\xe9~+*l\\xc2\\xe2C\\x1c\\xdb!ENx\\xb5\\xb8\\... \n", - "\n", - " child \n", - "0 b'\\x1d\\x80\\xb3\\xbd\\x8ar\\xf57QE\\xc6\\x9a}\\xd0\\xc... \n", - "1 b'\\xa7\\x04:\\xc1\\xbd\\xf74h\\\\\\x93G>)\\x81\\xd69\\xe... \n", - "2 b'+\\xf6\\xca\\x88\\xac\\x83JQ\\x8c\\xb85\\x837\\x13\\x0... " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 1000 entries, 0 to 495\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 parent 1000 non-null object\n", - " 1 child 1000 non-null object\n", - "dtypes: object(2)\n", - "memory usage: 23.4+ KB\n" - ] - } - ], - "source": [ - "ew_clusters.to_df().head(3)\n", - "ew_clusters.to_df().info()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "9304616c-7210-4f6d-a1fa-367728ee789c", - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "Dependency rule on column 'cmf__ddupes.sha1' tried to blank-out primary key column 'cmf__ddupe_probabilities.ddupe' on instance ''", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[14], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mew_deduped\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_cmf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/company-matching/cmf/data/results.py:139\u001b[0m, in \u001b[0;36mResultsBaseDataclass.to_cmf\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 138\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetadata\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] Writing deduplication data\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 139\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_deduper_to_cmf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;66;03m# Linker\u001b[39;00m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Write model\u001b[39;00m\n\u001b[1;32m 143\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetadata\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] Registering model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/company-matching/cmf/data/results.py:347\u001b[0m, in \u001b[0;36mProbabilityResults._deduper_to_cmf\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[38;5;66;03m# proposes_dedupes_dict[dd] = r[\"probability\"] \u001b[39;00m\n\u001b[1;32m 342\u001b[0m \n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# model.proposes_dedupes = proposes_dedupes_dict\u001b[39;00m\n\u001b[1;32m 345\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[TEST] inserted nodes \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(model\u001b[38;5;241m.\u001b[39mproposes_dedupes))\n\u001b[0;32m--> 347\u001b[0m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcommit\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 349\u001b[0m logic_logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[TEST] commited\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1969\u001b[0m, in \u001b[0;36mSession.commit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1966\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m trans \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1967\u001b[0m trans \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_autobegin_t()\n\u001b[0;32m-> 1969\u001b[0m \u001b[43mtrans\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcommit\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_to_root\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:2\u001b[0m, in \u001b[0;36mcommit\u001b[0;34m(self, _to_root)\u001b[0m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:139\u001b[0m, in \u001b[0;36m_StateChange.declare_states.._go\u001b[0;34m(fn, self, *arg, **kw)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_next_state \u001b[38;5;241m=\u001b[39m _StateChangeStates\u001b[38;5;241m.\u001b[39mCHANGE_IN_PROGRESS\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 139\u001b[0m ret_value \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1256\u001b[0m, in \u001b[0;36mSessionTransaction.commit\u001b[0;34m(self, _to_root)\u001b[0m\n\u001b[1;32m 1254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m SessionTransactionState\u001b[38;5;241m.\u001b[39mPREPARED:\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expect_state(SessionTransactionState\u001b[38;5;241m.\u001b[39mPREPARED):\n\u001b[0;32m-> 1256\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prepare_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1258\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnested:\n\u001b[1;32m 1259\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m conn, trans, should_commit, autoclose \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mset\u001b[39m(\n\u001b[1;32m 1260\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connections\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 1261\u001b[0m ):\n", - "File \u001b[0;32m:2\u001b[0m, in \u001b[0;36m_prepare_impl\u001b[0;34m(self)\u001b[0m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:139\u001b[0m, in \u001b[0;36m_StateChange.declare_states.._go\u001b[0;34m(fn, self, *arg, **kw)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_next_state \u001b[38;5;241m=\u001b[39m _StateChangeStates\u001b[38;5;241m.\u001b[39mCHANGE_IN_PROGRESS\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 139\u001b[0m ret_value \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1231\u001b[0m, in \u001b[0;36mSessionTransaction._prepare_impl\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msession\u001b[38;5;241m.\u001b[39m_is_clean():\n\u001b[1;32m 1230\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m-> 1231\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mflush\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1232\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1233\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\u001b[38;5;241m.\u001b[39mFlushError(\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOver 100 subsequent flushes have occurred within \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1235\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession.commit() - is an after_flush() hook \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreating new objects?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1237\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4312\u001b[0m, in \u001b[0;36mSession.flush\u001b[0;34m(self, objects)\u001b[0m\n\u001b[1;32m 4310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 4311\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_flushing \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m-> 4312\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_flush\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjects\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4313\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 4314\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_flushing \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4448\u001b[0m, in \u001b[0;36mSession._flush\u001b[0;34m(self, objects)\u001b[0m\n\u001b[1;32m 4446\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m 4447\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m util\u001b[38;5;241m.\u001b[39msafe_reraise():\n\u001b[0;32m-> 4448\u001b[0m transaction\u001b[38;5;241m.\u001b[39mrollback(_capture_exception\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:146\u001b[0m, in \u001b[0;36msafe_reraise.__exit__\u001b[0;34m(self, type_, value, traceback)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m exc_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exc_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;66;03m# remove potential circular references\u001b[39;00m\n\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc_value\u001b[38;5;241m.\u001b[39mwith_traceback(exc_tb)\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exc_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;66;03m# remove potential circular references\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4408\u001b[0m, in \u001b[0;36mSession._flush\u001b[0;34m(self, objects)\u001b[0m\n\u001b[1;32m 4406\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_warn_on_events \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 4407\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 4408\u001b[0m \u001b[43mflush_context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4409\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 4410\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_warn_on_events \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:466\u001b[0m, in \u001b[0;36mUOWTransaction.execute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 465\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m rec \u001b[38;5;129;01min\u001b[39;00m topological\u001b[38;5;241m.\u001b[39msort(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdependencies, postsort_actions):\n\u001b[0;32m--> 466\u001b[0m \u001b[43mrec\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:591\u001b[0m, in \u001b[0;36mProcessAll.execute\u001b[0;34m(self, uow)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdependency_processor\u001b[38;5;241m.\u001b[39mprocess_deletes(uow, states)\n\u001b[1;32m 590\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 591\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdependency_processor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess_saves\u001b[49m\u001b[43m(\u001b[49m\u001b[43muow\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstates\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:602\u001b[0m, in \u001b[0;36mOneToManyDP.process_saves\u001b[0;34m(self, uowcommit, states)\u001b[0m\n\u001b[1;32m 596\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m child \u001b[38;5;129;01min\u001b[39;00m history\u001b[38;5;241m.\u001b[39mdeleted:\n\u001b[1;32m 597\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 598\u001b[0m should_null_fks\n\u001b[1;32m 599\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcascade\u001b[38;5;241m.\u001b[39mdelete_orphan\n\u001b[1;32m 600\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhasparent(child)\n\u001b[1;32m 601\u001b[0m ):\n\u001b[0;32m--> 602\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_synchronize\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 603\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchild\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muowcommit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 604\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 606\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pks_changed(uowcommit, state):\n\u001b[1;32m 607\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m child \u001b[38;5;129;01min\u001b[39;00m history\u001b[38;5;241m.\u001b[39munchanged:\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/dependency.py:623\u001b[0m, in \u001b[0;36mOneToManyDP._synchronize\u001b[0;34m(self, state, child, associationrow, clearkeys, uowcommit, pks_changed)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m clearkeys:\n\u001b[0;32m--> 623\u001b[0m \u001b[43msync\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msynchronize_pairs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 624\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 625\u001b[0m sync\u001b[38;5;241m.\u001b[39mpopulate(\n\u001b[1;32m 626\u001b[0m source,\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparent,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpassive_updates \u001b[38;5;129;01mand\u001b[39;00m pks_changed,\n\u001b[1;32m 633\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/sync.py:88\u001b[0m, in \u001b[0;36mclear\u001b[0;34m(dest, dest_mapper, synchronize_pairs)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m l, r \u001b[38;5;129;01min\u001b[39;00m synchronize_pairs:\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 84\u001b[0m r\u001b[38;5;241m.\u001b[39mprimary_key\n\u001b[1;32m 85\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m dest_mapper\u001b[38;5;241m.\u001b[39m_get_state_attr_by_column(dest, dest\u001b[38;5;241m.\u001b[39mdict, r)\n\u001b[1;32m 86\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m orm_util\u001b[38;5;241m.\u001b[39m_none_set\n\u001b[1;32m 87\u001b[0m ):\n\u001b[0;32m---> 88\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDependency rule on column \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00ml\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtried to blank-out primary key \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumn \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on instance \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00morm_util\u001b[38;5;241m.\u001b[39mstate_str(dest)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 94\u001b[0m dest_mapper\u001b[38;5;241m.\u001b[39m_set_state_attr_by_column(dest, dest\u001b[38;5;241m.\u001b[39mdict, r, \u001b[38;5;28;01mNone\u001b[39;00m)\n", - "\u001b[0;31mAssertionError\u001b[0m: Dependency rule on column 'cmf__ddupes.sha1' tried to blank-out primary key column 'cmf__ddupe_probabilities.ddupe' on instance ''" - ] - } - ], - "source": [ - "ew_deduped.to_cmf()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "54a0284b-f51f-4435-a3ea-c8760d2c78bf", - "metadata": {}, - "outputs": [], - "source": [ - "ew_clusters.to_cmf()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "6ca24314-d92a-4b35-812c-4a3b20085249", - "metadata": {}, - "outputs": [], - "source": [ - "from cmf.data import Models, ENGINE, DDupeProbabilities, Dedupes, SourceData, Clusters, clusters_association\n", - "from sqlalchemy.orm import Session\n", - "from sqlalchemy import delete, select, values, column, LargeBinary\n", - "from sqlalchemy.dialects.postgresql import insert" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "e4308d24-65be-4477-af34-99c76225dd22", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "160031\n", - "160031\n" - ] - } - ], - "source": [ - "with Session(ENGINE) as session:\n", - " dd_n = session.query(Dedupes).count()\n", - " ddp_n = session.query(DDupeProbabilities).count()\n", - "print(dd_n)\n", - "print(ddp_n)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "2fe2dc58-b4e9-4f0c-9891-d180bd9db3bf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "160034\n", - "21\n" - ] - } - ], - "source": [ - "with Session(ENGINE) as session:\n", - " dd_n = session.query(Dedupes).count()\n", - " ddp_n = session.query(DDupeProbabilities).count()\n", - "print(dd_n)\n", - "print(ddp_n)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "a95bbee9-a1fa-4b10-9081-259760aa4e39", - "metadata": {}, - "outputs": [], - "source": [ - "with Session(ENGINE) as session:\n", - " model = session.query(Models).first()\n", - " subq = (\n", - " model\n", - " .proposes_dedupes\n", - " .select()\n", - " .with_only_columns(DDupeProbabilities.model)\n", - " )\n", - " session.execute(\n", - " delete(DDupeProbabilities)\n", - " .where(DDupeProbabilities.model.in_(subq))\n", - " )\n", - " session.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a7b60736-805b-4e07-a5aa-51f3110d7e3f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[b'\\x15\\xc9XO\\xbf\\xac\\x82o\\xd4)\\xbb\\x15Y\\x8bp?\\xf5\\x9a\\x8a\\xda', b'I\\xd7\\xd6G\\xa5\\x93)\\x90\\x8a\\x0c\\x8b\\xece\\x02}7\\x16\\x81\\x04\\x1a', b'\\x04-\\x85\\x1f\\xbcy\\xe5\\xbe\\x00\\x8a\\x1d!\\x84\\xfdr\\xa0\\xbb\\r\\xbf\\x7f', b'\\xcc%\\x11\\xba\"\\xfe\\x12v\\n\\xc7\\x14>B\\'\\xa1\\xd1\\xac\\xa5\\x0e\\x99', b'2\\\\\\x87\\x9a\\xd8k\\xf4\\xcb\\x1dV\\xeb\\x95I\\x89~\\xc3r\\xb1\\xc0i', b\"\\x15]u\\xa8\\x15\\xabEQ\\xbd1\\xa0\\x0b'8\\xd6Iy]$\\x11\", b'\\x8e\\xa4H\\x1b\\xed\\xc7K\\x1e\\xfe\\x80\\xed>\\xe0R\\xac.\\xd4\\x90_r', b',\\xca\\xc4}\\x9d\\x08\\xdf\\xf7;\\xd3@\\x8ck2\\xde\\x8e\\xb3\\xd1\\xb6M', b'\\xfb\\xc2\\xcd\\xd5\\xbb\\x8d\\x1b84\\x11\\xe4f\\xa9h\\x0b\\x9b!\\xe1\\x0c\\x1e', b'9\\xa1\\xc2\\x17\\x0c\\xfaj\\xa7\\xbe\\x8d\\x0eW\\xd9\\xaf\\x19\\xadc\\x9b\\xb2\"']\n" - ] - } - ], - "source": [ - "with Session(ENGINE) as session:\n", - " data_sample = session.query(SourceData).limit(10).all()\n", - "\n", - "print([d.sha1 for d in data_sample])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d227ce4d-455d-4e7b-8d6a-893055d014a4", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[, , , ]\n", - "[0.9, 0.3, 0.1, 0.1]\n" - ] - } - ], - "source": [ - "probabilities_to_add = [\n", - " {\n", - " \"sha1\": b\"123\", \n", - " \"left\": data_sample[0].sha1, \n", - " \"right\": data_sample[1].sha1,\n", - " \"probability\": 0.9\n", - " },\n", - " {\n", - " \"sha1\": b\"456\", \n", - " \"left\": data_sample[2].sha1, \n", - " \"right\": data_sample[3].sha1,\n", - " \"probability\": 0.3\n", - " },\n", - " {\n", - " \"sha1\": b\"789\", \n", - " \"left\": data_sample[4].sha1, \n", - " \"right\": data_sample[5].sha1,\n", - " \"probability\": 0.1\n", - " },\n", - " {\n", - " \"sha1\": b\"987\", \n", - " \"left\": data_sample[6].sha1, \n", - " \"right\": data_sample[7].sha1,\n", - " \"probability\": 0.1\n", - " },\n", - "]\n", - "\n", - "with Session(ENGINE) as session:\n", - " # Get model\n", - " model = session.query(Models).first()\n", - "\n", - " # Clear old model probabilities\n", - " old_ddupe_probs_subquery = (\n", - " model\n", - " .proposes_dedupes\n", - " .select()\n", - " .with_only_columns(DDupeProbabilities.model)\n", - " )\n", - " \n", - " session.execute(\n", - " delete(DDupeProbabilities)\n", - " .where(DDupeProbabilities.model.in_(old_ddupe_probs_subquery))\n", - " )\n", - "\n", - " session.commit()\n", - "\n", - " # Insert any new dedupe nodes\n", - " session.execute(\n", - " insert(Dedupes)\n", - " .on_conflict_do_nothing(\n", - " index_elements=[Dedupes.sha1]\n", - " )\n", - " .returning(Dedupes),\n", - " probabilities_to_add\n", - " )\n", - "\n", - " # Get all relevant dedupe nodes\n", - " ddupes_to_add_cte = (\n", - " values(\n", - " column(\"sha1\", LargeBinary),\n", - " name=\"sha_dedupe_cte\"\n", - " ).data([(dd[\"sha1\"],) for dd in probabilities_to_add])\n", - " )\n", - " \n", - " ddupes = (\n", - " session.query(Dedupes)\n", - " .join(ddupes_to_add_cte, ddupes_to_add_cte.c.sha1 == Dedupes.sha1)\n", - " .all()\n", - " )\n", - "\n", - " print(ddupes)\n", - "\n", - " # Attach probabilities to create dedupe probability nodes\n", - " ddupe_probs = []\n", - " for dd, data in zip(ddupes, probabilities_to_add):\n", - " p = DDupeProbabilities(probability=data[\"probability\"])\n", - " p.dedupes = dd\n", - " ddupe_probs.append(p)\n", - "\n", - " print([dd.probability for dd in ddupe_probs])\n", - "\n", - " # Attach new probabilities\n", - " model.proposes_dedupes.add_all(ddupe_probs)\n", - " \n", - " session.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "628b88aa-8157-433e-917d-6c33e2737672", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4\n" - ] - } - ], - "source": [ - "from sqlalchemy import func\n", - "with Session(ENGINE) as session:\n", - " model = session.query(Models).first()\n", - " x = session.scalar(model.proposes_dedupes.select().with_only_columns(func.count()))\n", - " print(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "cc7b1714-1a7e-47cf-b1a3-f957f6549c18", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n", - "0\n" - ] - } - ], - "source": [ - "from sqlalchemy import func\n", - "with Session(ENGINE) as session:\n", - " m0 = session.query(Models).all()[0]\n", - " m1 = session.query(Models).all()[1]\n", - " \n", - " n0 = session.scalar(\n", - " m0.creates.select().with_only_columns(func.count())\n", - " )\n", - " n1 = session.scalar(\n", - " m1.creates.select().with_only_columns(func.count())\n", - " )\n", - " print(n0)\n", - " print(n1)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "6ba7b0b8-7bc8-473b-b478-b822dd003781", - "metadata": {}, - "outputs": [], - "source": [ - "with Session(ENGINE) as session:\n", - " # Get model\n", - " m0 = session.query(Models).all()[0]\n", - " m1 = session.query(Models).all()[1]\n", - " # Get clusters\n", - " clusters = session.query(Clusters).limit(10).all()\n", - " # Add 'em\n", - " # m0.creates.add_all(clusters)\n", - " session.commit()\n", - " \n", - " m1.creates.add_all(clusters)\n", - " \n", - " session.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "8f91a6ba-842e-4e07-a521-dcbc8f1bf483", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[autoreload of cmf.data.models failed: Traceback (most recent call last):\n", - " File \"/opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", - " superreload(m, reload, self.old_objects)\n", - " File \"/opt/conda/envs/company_matching/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 475, in superreload\n", - " module = reload(module)\n", - " File \"/opt/conda/envs/company_matching/lib/python3.9/importlib/__init__.py\", line 169, in reload\n", - " _bootstrap._exec(spec, module)\n", - " File \"\", line 613, in _exec\n", - " File \"\", line 846, in exec_module\n", - " File \"\", line 983, in get_code\n", - " File \"\", line 913, in source_to_code\n", - " File \"\", line 228, in _call_with_frames_removed\n", - " File \"/home/jovyan/company-matching/cmf/data/models.py\", line 67\n", - " def creates_count() ->\n", - " ^\n", - "SyntaxError: invalid syntax\n", - "]\n" - ] - } - ], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "580f8529-6e4d-41b7-8346-5211fc54b66c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "from sqlalchemy import func\n", - "with Session(ENGINE) as session:\n", - " m0 = session.query(Models).all()[0]\n", - " m1 = session.query(Models).all()[1]\n", - " \n", - " x = m0.creates.select().with_only_columns(func.count())\n", - " print(type(x))" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "1383fdd6-927a-4920-8760-4c9758bec256", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n", - "10\n" - ] - } - ], - "source": [ - "from sqlalchemy import func\n", - "with Session(ENGINE) as session:\n", - " m0 = session.query(Models).all()[0]\n", - " m1 = session.query(Models).all()[1]\n", - " \n", - " n0 = session.scalar(\n", - " m0.creates.select().with_only_columns(func.count())\n", - " )\n", - " n1 = session.scalar(\n", - " m1.creates.select().with_only_columns(func.count())\n", - " )\n", - " print(n0)\n", - " print(n1)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2c786917-7e60-4520-a14f-e0301760444a", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'Session' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mSession\u001b[49m(ENGINE) \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[1;32m 2\u001b[0m cl_n \u001b[38;5;241m=\u001b[39m session\u001b[38;5;241m.\u001b[39mquery(Clusters)\u001b[38;5;241m.\u001b[39mcount()\n\u001b[1;32m 3\u001b[0m cla_n \u001b[38;5;241m=\u001b[39m session\u001b[38;5;241m.\u001b[39mquery(clusters_association)\u001b[38;5;241m.\u001b[39mcount()\n", - "\u001b[0;31mNameError\u001b[0m: name 'Session' is not defined" - ] - } - ], - "source": [ - "with Session(ENGINE) as session:\n", - " cl_n = session.query(Clusters).count()\n", - " cla_n = session.query(clusters_association).count()\n", - "print(cl_n)\n", - "print(cla_n)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "eee63e34-416c-4256-b4f0-e35bb8dfaf24", - "metadata": {}, - "outputs": [], - "source": [ - "with Session(ENGINE) as session:\n", - " # Get model\n", - " model = session.query(Models).first()\n", - " subq = model.creates.select().with_only_columns(Clusters.sha1)\n", - " session.execute(\n", - " delete(clusters_association)\n", - " .where(clusters_association.c.child.in_(subq))\n", - " )\n", - " session.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0233979d-93b1-4056-954d-64d0e5291ca6", - "metadata": {}, - "outputs": [], - "source": [ - "from cmf.data import Models, Dedupes, DDupeProbabilities, ENGINE\n", - "from sqlalchemy.orm import Session" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "e9d944bf-5fdf-4fc5-828f-4825bdf4029c", - "metadata": {}, - "outputs": [], - "source": [ - "m = Models(sha1=b\"123\")\n", - "p = DDupeProbabilities(probability=0.5)\n", - "p.dedupes = Dedupes()\n", - "m.proposes_dedupes.append(p)\n", - "\n", - "# with Session(ENGINE) as session:" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/engineering/WL_query.ipynb b/notebooks/engineering/WL_query.ipynb deleted file mode 100644 index 5fb2995..0000000 --- a/notebooks/engineering/WL_query.ipynb +++ /dev/null @@ -1,1097 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "d8019f5c-5446-46fa-90d3-b5db28541001", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"\n", - "# pip install dwutils@git+ssh://git@gitlab.data.trade.gov.uk/ddatdatascienceteam/data-workspace-utilities.git@latest" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "6c751528-6238-4f29-a9e4-79bf167d8308", - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "\n", - "# import connectorx as cx\n", - "from pandas import DataFrame\n", - "import pandas as pd\n", - "from typing import Optional, Dict, List\n", - "from sqlglot import parse_one\n", - "import time\n", - "from datetime import timedelta\n", - "\n", - "from sqlalchemy import select, Engine\n", - "from sqlalchemy.dialects import postgresql\n", - "\n", - "import cmf\n", - "from cmf.helpers import selector\n", - "from cmf.data.utils import sqa_profiled\n", - "from cmf.helpers.selector import _parent_to_tree, _tree_to_reachable_stmt, _reachable_to_parent_data_stmt, _selector_to_data\n", - "from cmf.data import ENGINE\n", - "\n", - "def create_cmf_pipelines_logger() -> logging.Logger:\n", - " pipeline_logger = logging.getLogger(\"cmf_pipelines\")\n", - " logic_logger = logging.getLogger(\"cmf_logic\")\n", - "\n", - " pipeline_logger.setLevel(logging.INFO)\n", - " logic_logger.setLevel(logging.INFO)\n", - "\n", - " handler = logging.StreamHandler()\n", - " formatter = logging.Formatter(\n", - " \"[%(asctime)s: %(levelname)s] %(name)s %(module)s: %(message)s\"\n", - " )\n", - " handler.setFormatter(formatter)\n", - "\n", - " pipeline_logger.addHandler(handler)\n", - " logic_logger.addHandler(handler)\n", - "\n", - " return pipeline_logger\n", - "\n", - "logger = create_cmf_pipelines_logger()" - ] - }, - { - "cell_type": "markdown", - "id": "f701e3de-ee2a-4a61-b764-af9d3f34e91b", - "metadata": {}, - "source": [ - "# Speeding up queries\n", - "\n", - "Everything is slower than I thought. Let's profile and optimise.\n", - "\n", - "Let's compile the SQL for three tables so we've got points to compare and contrast." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# This is just the relevant innards of query()\n", - "\n", - "def compile_query_to_postgresql(\n", - " selector: Dict[str, List[str]],\n", - " model: str,\n", - " engine: Engine = ENGINE,\n", - ") -> str:\n", - " parent, child = _parent_to_tree(model, engine=engine)\n", - "\n", - " if len(parent) == 0:\n", - " raise ValueError(f\"Model {model} not found\")\n", - "\n", - " tree = [parent] + child\n", - " reachable_stmt = _tree_to_reachable_stmt(tree)\n", - " lookup_stmt = _reachable_to_parent_data_stmt(reachable_stmt, parent)\n", - " data_stmt = _selector_to_data(selector, engine=engine).cte()\n", - "\n", - " final_stmt = select(lookup_stmt.c.parent.label(\"cluster_sha1\"), data_stmt).join(\n", - " lookup_stmt, lookup_stmt.c.child == data_stmt.c.data_sha1\n", - " )\n", - "\n", - " with ENGINE.connect() as conn:\n", - " cursor = conn.connection.cursor()\n", - " compiled = final_stmt.compile(\n", - " dialect=postgresql.dialect(),\n", - " compile_kwargs={\"render_postcompile\": True}\n", - " )\n", - " compiled_bound = cursor.mogrify(str(compiled), compiled.params)\n", - " sql = parse_one(compiled_bound.decode(\"utf-8\"))\n", - "\n", - " return sql.sql(dialect=\"postgres\", pretty=True)" - ] - }, - { - "source": [ - "## 🔴 Data Hub companies\n", - "\n", - "\n", - "Weirdly slow for 500k records. Times out.\n", - "\n", - "...and now doesn't?! This was proper breaking last week!" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE source_data_unnested AS (\n SELECT\n _team_cmf.cmf__source_data.sha1 AS sha1,\n UNNEST(_team_cmf.cmf__source_data.id) AS id,\n _team_cmf.cmf__source_data.dataset AS dataset\n FROM _team_cmf.cmf__source_data\n), anon_1 AS (\n SELECT\n source_data_unnested.sha1 AS data_sha1,\n dbt.data_hub__companies.name AS dbt_data_hub__companies_name,\n dbt.data_hub__companies.company_number AS dbt_data_hub__companies_company_number,\n dbt.data_hub__companies.address_postcode AS dbt_data_hub__companies_address_postcode\n FROM source_data_unnested\n LEFT OUTER JOIN dbt.data_hub__companies\n ON source_data_unnested.id = CAST(dbt.data_hub__companies.id AS VARCHAR)\n AND source_data_unnested.dataset = CAST(CAST('60f65644-8990-4fcc-b0c3-555cbd284b7d' AS UUID) AS UUID)\n WHERE\n NOT dbt.data_hub__companies.id IS NULL\n), allowed AS (\n SELECT\n _team_cmf.cmf__ddupe_contains.parent AS parent,\n _team_cmf.cmf__ddupe_contains.child AS child\n FROM _team_cmf.cmf__ddupe_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__ddupe_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\xa1b6d0eaf9115726b371548db2f97ee99af64854' AS BYTEA))\n UNION\n SELECT\n _team_cmf.cmf__link_contains.parent AS parent,\n _team_cmf.cmf__link_contains.child AS child\n FROM _team_cmf.cmf__link_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__link_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_2\n ON _team_cmf.cmf__link_contains.child = cmf__clusters_2.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\xa1b6d0eaf9115726b371548db2f97ee99af64854' AS BYTEA))\n), root AS (\n SELECT\n allowed.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN _team_cmf.cmf__clusters\n ON _team_cmf.cmf__clusters.sha1 = allowed.parent\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = _team_cmf.cmf__clusters.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 = CAST('\\xa1b6d0eaf9115726b371548db2f97ee99af64854' AS BYTEA)\n), recurse(parent, child) AS (\n SELECT\n root.parent AS parent,\n root.child AS child\n FROM root\n UNION\n SELECT\n recurse.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN recurse\n ON allowed.parent = recurse.child\n)\nSELECT\n recurse.parent AS cluster_sha1,\n anon_1.data_sha1,\n anon_1.dbt_data_hub__companies_name,\n anon_1.dbt_data_hub__companies_company_number,\n anon_1.dbt_data_hub__companies_address_postcode\nFROM anon_1\nJOIN recurse\n ON recurse.child = anon_1.data_sha1\n" - } - ], - "source": [ - "my_selector = selector(\n", - " table=\"dbt.data_hub__companies\",\n", - " fields=[\"name\", \"company_number\", \"address_postcode\"],\n", - ")\n", - "my_model = \"naive_data_hub_v1\"\n", - "\n", - "compiled = compile_query_to_postgresql(selector=my_selector, model=my_model)\n", - "print(compiled)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:01:45.517325\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n data_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n dbt_data_hub__companies_name \\\n0 National Star Centre For Disabled Youth Ltd \n1 HAWKESBURY CONSULTING LIMITED \n2 BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION... \n\n dbt_data_hub__companies_company_number \\\n0 \n1 06736356 \n2 \n\n dbt_data_hub__companies_address_postcode \n0 GL53 9QU \n1 CB24 4UQ \n2 B4 6NH ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1dbt_data_hub__companies_namedbt_data_hub__companies_company_numberdbt_data_hub__companies_address_postcode
0b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...National Star Centre For Disabled Youth Ltd<NA>GL53 9QU
1b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...HAWKESBURY CONSULTING LIMITED06736356CB24 4UQ
2b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION...<NA>B4 6NH
\n
" - }, - "metadata": {}, - "execution_count": 19 - } - ], - "source": [ - "start = time.time()\n", - "\n", - "df = cmf.query(\n", - " selector=my_selector, return_type=\"pandas\", model=my_model\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "df.head(3)" - ] - }, - { - "source": [ - "## 🟡 Export wins\n", - "\n", - "50k records, takes about a minute. Slower than you'd hope and seems to share a query plan with Data Hub, but is small enough it doesn't matter." - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE source_data_unnested AS (\n SELECT\n _team_cmf.cmf__source_data.sha1 AS sha1,\n UNNEST(_team_cmf.cmf__source_data.id) AS id,\n _team_cmf.cmf__source_data.dataset AS dataset\n FROM _team_cmf.cmf__source_data\n), anon_1 AS (\n SELECT\n source_data_unnested.sha1 AS data_sha1,\n dbt.export_wins__wins_dataset.company_name AS dbt_export_wins__wins_dataset_company_name,\n dbt.export_wins__wins_dataset.cdms_reference AS dbt_export_wins__wins_dataset_cdms_reference\n FROM source_data_unnested\n LEFT OUTER JOIN dbt.export_wins__wins_dataset\n ON source_data_unnested.id = CAST(dbt.export_wins__wins_dataset.id AS VARCHAR)\n AND source_data_unnested.dataset = CAST(CAST('cc89099f-d065-49cc-aa45-e08e1db6653a' AS UUID) AS UUID)\n WHERE\n NOT dbt.export_wins__wins_dataset.id IS NULL\n), allowed AS (\n SELECT\n _team_cmf.cmf__ddupe_contains.parent AS parent,\n _team_cmf.cmf__ddupe_contains.child AS child\n FROM _team_cmf.cmf__ddupe_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__ddupe_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n UNION\n SELECT\n _team_cmf.cmf__link_contains.parent AS parent,\n _team_cmf.cmf__link_contains.child AS child\n FROM _team_cmf.cmf__link_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__link_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_2\n ON _team_cmf.cmf__link_contains.child = cmf__clusters_2.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n), root AS (\n SELECT\n allowed.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN _team_cmf.cmf__clusters\n ON _team_cmf.cmf__clusters.sha1 = allowed.parent\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = _team_cmf.cmf__clusters.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 = CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA)\n), recurse(parent, child) AS (\n SELECT\n root.parent AS parent,\n root.child AS child\n FROM root\n UNION\n SELECT\n recurse.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN recurse\n ON allowed.parent = recurse.child\n)\nSELECT\n recurse.parent AS cluster_sha1,\n anon_1.data_sha1,\n anon_1.dbt_export_wins__wins_dataset_company_name,\n anon_1.dbt_export_wins__wins_dataset_cdms_reference\nFROM anon_1\nJOIN recurse\n ON recurse.child = anon_1.data_sha1\n" - } - ], - "source": [ - "my_selector = selector(\n", - " table=\"dbt.export_wins__wins_dataset\",\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "my_model = \"naive_export_wins_v1\"\n", - "\n", - "compiled = compile_query_to_postgresql(selector=my_selector, model=my_model)\n", - "print(compiled)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:00:22.835470\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11... \n1 b'\\x04\\xdfY\\xad\\xadtT\\x1b\\xed\\xfd\\x06w\\xe9J\\xf... \n2 b'\\x06\\xc1S\\xb5p\\x88SZ\\xbcV\\xd0a\\xfbT\\xad\\xd3g... \n\n data_sha1 \\\n0 b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11... \n1 b'&\\x04\\x9a\\xda~v\\xbeu?F\\xf0\\xfd\\x92\\xa7IP\\xfa... \n2 b'\\x8cV\\xb8[\\xac\\xa6K,]\\xb1\\x96\\xbf\\xfe\\x1a\\x9... \n\n dbt_export_wins__wins_dataset_company_name \\\n0 ETA Green Power Limited \n1 Med-Eq (Europe) Ltd \n2 Silver Lined Horizons Ltd \n\n dbt_export_wins__wins_dataset_cdms_reference \n0 Companies House ref: 12359858 \n1 ORG-10109781 \n2 ORG-10170829 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1dbt_export_wins__wins_dataset_company_namedbt_export_wins__wins_dataset_cdms_reference
0b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11...b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11...ETA Green Power LimitedCompanies House ref: 12359858
1b'\\x04\\xdfY\\xad\\xadtT\\x1b\\xed\\xfd\\x06w\\xe9J\\xf...b'&\\x04\\x9a\\xda~v\\xbeu?F\\xf0\\xfd\\x92\\xa7IP\\xfa...Med-Eq (Europe) LtdORG-10109781
2b'\\x06\\xc1S\\xb5p\\x88SZ\\xbcV\\xd0a\\xfbT\\xad\\xd3g...b'\\x8cV\\xb8[\\xac\\xa6K,]\\xb1\\x96\\xbf\\xfe\\x1a\\x9...Silver Lined Horizons LtdORG-10170829
\n
" - }, - "metadata": {}, - "execution_count": 15 - } - ], - "source": [ - "start = time.time()\n", - "\n", - "df = cmf.query(\n", - " selector=my_selector, return_type=\"pandas\", model=my_model\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "df.head(3)" - ] - }, - { - "source": [ - "## 🟢 Companies House\n", - "\n", - "5.5m records, takes about 3 minutes. Weirdless fast -- query plan indicates hash joins. Why does this work well" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE source_data_unnested AS (\n SELECT\n _team_cmf.cmf__source_data.sha1 AS sha1,\n UNNEST(_team_cmf.cmf__source_data.id) AS id,\n _team_cmf.cmf__source_data.dataset AS dataset\n FROM _team_cmf.cmf__source_data\n), anon_1 AS (\n SELECT\n source_data_unnested.sha1 AS data_sha1,\n companieshouse.companies.company_name AS companieshouse_companies_company_name,\n companieshouse.companies.company_number AS companieshouse_companies_company_number,\n companieshouse.companies.postcode AS companieshouse_companies_postcode\n FROM source_data_unnested\n LEFT OUTER JOIN companieshouse.companies\n ON source_data_unnested.id = CAST(companieshouse.companies.id AS VARCHAR)\n AND source_data_unnested.dataset = CAST(CAST('592b69e0-ce95-47a6-9f0a-bcd792f214a4' AS UUID) AS UUID)\n WHERE\n NOT companieshouse.companies.id IS NULL\n), allowed AS (\n SELECT\n _team_cmf.cmf__ddupe_contains.parent AS parent,\n _team_cmf.cmf__ddupe_contains.child AS child\n FROM _team_cmf.cmf__ddupe_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__ddupe_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA))\n UNION\n SELECT\n _team_cmf.cmf__link_contains.parent AS parent,\n _team_cmf.cmf__link_contains.child AS child\n FROM _team_cmf.cmf__link_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__link_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_2\n ON _team_cmf.cmf__link_contains.child = cmf__clusters_2.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA))\n), root AS (\n SELECT\n allowed.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN _team_cmf.cmf__clusters\n ON _team_cmf.cmf__clusters.sha1 = allowed.parent\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = _team_cmf.cmf__clusters.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 = CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA)\n), recurse(parent, child) AS (\n SELECT\n root.parent AS parent,\n root.child AS child\n FROM root\n UNION\n SELECT\n recurse.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN recurse\n ON allowed.parent = recurse.child\n)\nSELECT\n recurse.parent AS cluster_sha1,\n anon_1.data_sha1,\n anon_1.companieshouse_companies_company_name,\n anon_1.companieshouse_companies_company_number,\n anon_1.companieshouse_companies_postcode\nFROM anon_1\nJOIN recurse\n ON recurse.child = anon_1.data_sha1\n" - } - ], - "source": [ - "my_selector = selector(\n", - " table=\"companieshouse.companies\",\n", - " fields=[\"company_name\", \"company_number\", \"postcode\"],\n", - ")\n", - "my_model = \"naive_companies_house_v1\"\n", - "\n", - "compiled = compile_query_to_postgresql(selector=my_selector, model=my_model)\n", - "print(compiled)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:02:12.507736\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\... \n1 b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\... \n2 b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x... \n\n data_sha1 \\\n0 b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\... \n1 b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\... \n2 b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x... \n\n companieshouse_companies_company_name \\\n0 ARCADE GEEKS INT LTD \n1 LOWELL GROUP SHARED SERVICES LIMITED \n2 KIMDOOLE LTD \n\n companieshouse_companies_company_number companieshouse_companies_postcode \n0 13231865 DY13 9RH \n1 08647094 LS15 8GH \n2 14445223 WC2H 9JQ ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1companieshouse_companies_company_namecompanieshouse_companies_company_numbercompanieshouse_companies_postcode
0b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\...b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\...ARCADE GEEKS INT LTD13231865DY13 9RH
1b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\...b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\...LOWELL GROUP SHARED SERVICES LIMITED08647094LS15 8GH
2b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x...b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x...KIMDOOLE LTD14445223WC2H 9JQ
\n
" - }, - "metadata": {}, - "execution_count": 17 - } - ], - "source": [ - "start = time.time()\n", - "\n", - "df = cmf.query(\n", - " selector=my_selector, return_type=\"pandas\", model=my_model\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "df.head(3)" - ] - }, - { - "source": [ - "# Scratch\n", - "\n", - "The below is me messing about. Here be dragons." - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 6 µs, sys: 0 ns, total: 6 µs\nWall time: 10.7 µs\n 1053456 function calls (1052831 primitive calls) in 324.603 seconds\n\n Ordered by: cumulative time\n\n ncalls tottime percall cumtime percall filename:lineno(function)\n 1 0.020 0.020 324.603 324.603 /home/theia/company-matching/cmf/helpers/selector.py:335(query)\n 1 321.725 321.725 321.725 321.725 {method 'copy_expert' of 'psycopg2.extensions.cursor' objects}\n 1 0.002 0.002 1.635 1.635 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:868(read_csv)\n 1 0.000 0.000 1.633 1.633 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:583(_read)\n 1 0.000 0.000 1.471 1.471 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1907(read)\n 1 1.096 1.096 1.273 1.273 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:222(read)\n 2 0.000 0.000 0.458 0.229 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/accessor.py:248(__getitem__)\n 2 0.000 0.000 0.457 0.229 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/base.py:37(_str_getitem)\n 2 0.000 0.000 0.457 0.229 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:440(_str_slice)\n 2 0.019 0.009 0.457 0.229 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/object_array.py:304(_str_slice)\n 2 0.240 0.120 0.439 0.219 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:289(_str_map)\n 25 0.000 0.000 0.424 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:505(_execute_on_connection)\n 25 0.000 0.000 0.424 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1593(_execute_clauseelement)\n 24 0.000 0.000 0.423 0.018 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1377(execute)\n 25 0.000 0.000 0.421 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1794(_execute_context)\n 25 0.000 0.000 0.418 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1853(_exec_single_context)\n 25 0.000 0.000 0.416 0.017 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:923(do_execute)\n 25 0.415 0.017 0.416 0.017 {method 'execute' of 'psycopg2.extensions.cursor' objects}\n 2 0.000 0.000 0.316 0.158 /home/theia/company-matching/cmf/data/utils/db.py:82(string_to_table)\n 3 0.000 0.000 0.316 0.105 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/deprecations.py:249(warned)\n 2 0.000 0.000 0.316 0.158 :1(__new__)\n 2 0.000 0.000 0.316 0.158 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:424(__new__)\n 2 0.000 0.000 0.316 0.158 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:433(_new)\n 4 0.000 0.000 0.316 0.079 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:488(__init__)\n 2 0.000 0.000 0.316 0.158 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:881(_autoload)\n 2 0.000 0.000 0.310 0.155 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1468(reflect_table)\n 2 0.000 0.000 0.298 0.149 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1950(_get_reflection_info)\n 16 0.000 0.000 0.298 0.019 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1970(run)\n 1 0.000 0.000 0.287 0.287 /home/theia/company-matching/cmf/helpers/selector.py:299(_selector_to_pandas_dtypes)\n 9 0.000 0.000 0.268 0.030 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:659(__array__)\n 9 0.241 0.027 0.268 0.030 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1356(to_numpy)\n 2 0.000 0.000 0.223 0.112 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:876(get_multi_columns)\n 2 0.000 0.000 0.223 0.112 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3631(get_multi_columns)\n 39/36 0.000 0.000 0.219 0.006 {built-in method numpy.asarray}\n 3 0.000 0.000 0.218 0.073 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:4780(apply)\n 3 0.000 0.000 0.218 0.073 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/apply.py:1409(apply)\n 3 0.000 0.000 0.218 0.073 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/apply.py:1482(apply_standard)\n 3 0.000 0.000 0.217 0.072 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:891(_map_values)\n 2 0.000 0.000 0.217 0.109 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1415(map)\n 2 0.015 0.008 0.217 0.109 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:2299(map)\n 5 0.000 0.000 0.202 0.040 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:78(cache)\n 3 0.152 0.051 0.202 0.067 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/algorithms.py:1667(map_array)\n 14/13 0.000 0.000 0.199 0.015 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:389(__init__)\n 2 0.000 0.000 0.199 0.099 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:683(__init__)\n 2 0.000 0.000 0.198 0.099 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:423(dict_to_mgr)\n 21 0.000 0.000 0.197 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:517(sanitize_array)\n 1 0.000 0.000 0.197 0.197 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:607(_init_dict)\n 6 0.033 0.006 0.196 0.033 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1577(construct_1d_object_array_from_listlike)\n 2 0.000 0.000 0.196 0.098 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:763(_try_cast)\n 20 0.004 0.000 0.176 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:201(_from_sequence_of_strings)\n 20 0.165 0.008 0.172 0.009 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:174(_from_sequence)\n 5 0.000 0.000 0.163 0.033 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:974(__array__)\n 1 0.000 0.000 0.162 0.162 /home/theia/company-matching/cmf/helpers/selector.py:239(_selector_to_data)\n 1 0.000 0.000 0.161 0.161 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1575(__init__)\n 1 0.000 0.000 0.161 0.161 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1848(_make_engine)\n 1 0.161 0.161 0.161 0.161 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:60(__init__)\n 1015746 0.139 0.000 0.139 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/object_array.py:306()\n 1 0.000 0.000 0.119 0.119 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:570(read_sql)\n 1 0.000 0.000 0.116 0.116 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1779(read_query)\n 1 0.000 0.000 0.115 0.115 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1655(execute)\n 2 0.000 0.000 0.115 0.057 :1(_load_domains)\n 2 0.000 0.000 0.114 0.057 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4878(_load_domains)\n 2 0.000 0.000 0.087 0.044 :1(_load_enums)\n 2 0.000 0.000 0.087 0.044 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4805(_load_enums)\n 14 0.000 0.000 0.064 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:317(apply)\n 2 0.000 0.000 0.062 0.031 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6460(astype)\n 5 0.000 0.000 0.062 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:723(astype)\n 5 0.000 0.000 0.062 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:191(astype_array_safe)\n 5 0.000 0.000 0.062 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:157(astype_array)\n 2 0.000 0.000 0.062 0.031 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:421(astype)\n 5 0.000 0.000 0.062 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:56(_astype_nansafe)\n 5 0.006 0.001 0.062 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:303(_from_sequence)\n 5 0.048 0.010 0.056 0.011 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:447(_box_pa_array)\n 4 0.000 0.000 0.049 0.012 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:256(astype)\n 2 0.000 0.000 0.049 0.025 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:656(astype)\n 4 0.000 0.000 0.049 0.012 {built-in method numpy.array}\n 8 0.000 0.000 0.030 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3984(_reflect_constraint)\n 3 0.000 0.000 0.023 0.008 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:544(__getitem__)\n 3 0.022 0.007 0.022 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/compute.py:248(wrapper)\n 2 0.000 0.000 0.022 0.011 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:964(get_multi_pk_constraint)\n 4 0.000 0.000 0.022 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4044()\n 3 0.000 0.000 0.017 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2245(execute)\n 3 0.000 0.000 0.017 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2078(_execute_internal)\n 3 0.000 0.000 0.017 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:283(orm_execute_statement)\n 2 0.000 0.000 0.016 0.008 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2700(first)\n 2 0.000 0.000 0.016 0.008 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2822(_iter)\n 7 0.015 0.002 0.015 0.002 {method 'copy' of 'numpy.ndarray' objects}\n 6 0.000 0.000 0.012 0.002 :1(_get_table_oids)\n 6 0.000 0.000 0.012 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:105(go)\n 2 0.000 0.000 0.012 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1410(get_multi_check_constraints)\n 2 0.000 0.000 0.012 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3877(_get_table_oids)\n 2 0.000 0.000 0.012 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4697(get_multi_check_constraints)\n 2 0.000 0.000 0.011 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1051(get_multi_foreign_keys)\n 2 0.000 0.000 0.011 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4156(get_multi_foreign_keys)\n 2 0.000 0.000 0.011 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1139(get_multi_indexes)\n 1 0.000 0.000 0.011 0.011 /home/theia/company-matching/cmf/helpers/selector.py:137(_parent_to_tree)\n 2 0.000 0.000 0.011 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4405(get_multi_indexes)\n 10 0.000 0.000 0.010 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:691(do_rollback)\n 10 0.010 0.001 0.010 0.001 {method 'rollback' of 'psycopg2.extensions.connection' objects}\n 1 0.000 0.000 0.010 0.010 /home/theia/company-matching/cmf/data/utils/db.py:112(string_to_dataset)\n 35 0.001 0.000 0.010 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:129(__init__)\n 8 0.000 0.000 0.009 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1221(close)\n 2 0.000 0.000 0.009 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:309(_compiler)\n 2 0.000 0.000 0.009 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1335(__init__)\n 2 0.000 0.000 0.009 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1319(get_multi_table_comment)\n 62 0.000 0.000 0.009 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6298(__setattr__)\n 2 0.000 0.000 0.009 0.005 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4622(get_multi_table_comment)\n 2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:822(__init__)\n 3/2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:909(process)\n 158/2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:129(_compiler_dispatch)\n 9/2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4657(visit_select)\n 2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1227(get_multi_unique_constraints)\n 2 0.000 0.000 0.009 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4545(get_multi_unique_constraints)\n 5 0.000 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2577(close)\n 5 0.000 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2726(_do_close)\n 5 0.000 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2710(_close_impl)\n 5 0.000 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2704(_connection_rollback_impl)\n 5 0.000 0.000 0.009 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1116(_rollback_impl)\n 1 0.000 0.000 0.008 0.008 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:243(compile)\n 4 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4259(__setitem__)\n 4 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4502(_set_item)\n 94 0.001 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1619(_reflect_column)\n 22 0.008 0.000 0.008 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/compute.py:338(cast)\n 4 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:5229(_sanitize_column)\n 4 0.000 0.000 0.008 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:12662(_reindex_for_setitem)\n 9/2 0.000 0.000 0.007 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4926(_compose_select_body)\n 9/2 0.000 0.000 0.007 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4976()\n 13/1 0.000 0.000 0.007 0.007 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5263(visit_join)\n 12/2 0.000 0.000 0.007 0.003 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3998(visit_cte)\n 141 0.001 0.000 0.006 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1320(_set_parent_with_dispatch)\n 55/51 0.000 0.000 0.005 0.000 {built-in method builtins.next}\n 24/22 0.000 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:123(__exit__)\n 4 0.000 0.000 0.005 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:330(_inspection_context)\n 42 0.000 0.000 0.005 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:309(_operation_context)\n 53/38 0.000 0.000 0.005 0.000 {method 'join' of 'str' objects}\n 14/11 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:660(create_for_statement)\n 2/1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2999(visit_compound_select)\n 6/3 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3037()\n 94 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1208(append_column)\n 8 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1000(create_for_statement)\n 2 0.000 0.000 0.004 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:237(__exit__)\n 110 0.001 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1505(__init__)\n 2 0.000 0.000 0.004 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1811(_reflect_indexes)\n 74/72 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1137(__get__)\n 26 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5163(__init__)\n 208/202 0.001 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:314(expect)\n 10 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:850(c)\n 6 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2073(_populate_column_collection)\n 25 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1276(_init_compiled)\n 6 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6245(_generate_fromclause_column_proxies)\n 28 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4182(__init__)\n 5 0.002 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:834(isna)\n 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1384(_checkin)\n 25 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:667(_compile_w_cache)\n 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:917(_finalize_fairy)\n 37 0.002 0.000 0.002 0.000 {built-in method numpy.empty}\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1643(__exit__)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:538(close)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:489(__exit__)\n 16 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2594(_make_proxy)\n 1 0.000 0.000 0.002 0.002 /home/theia/company-matching/cmf/helpers/selector.py:200(_reachable_to_parent_data_stmt)\n 5 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1507(close)\n 9 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4768()\n 5 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1750(__exit__)\n 5 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2423(close)\n 25 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4432(_label_select_column)\n 5 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2525(_close_impl)\n 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1179(_setup_for_generate)\n 5 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:95(_go)\n 1 0.000 0.000 0.002 0.002 /home/theia/company-matching/cmf/helpers/selector.py:163(_tree_to_reachable_stmt)\n 8 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1398(_reset)\n 94 0.001 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2291(_set_parent)\n 36 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:552(__get__)\n 1 0.000 0.000 0.002 0.002 /home/theia/company-matching/cmf/helpers/selector.py:107(get_all_children)\n 1 0.000 0.000 0.002 0.002 /home/theia/company-matching/cmf/data/models.py:69(child_neighbours)\n 2/1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1063(get)\n 22 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1505(operate)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1108(_fire_loader_callables)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:867(_load_for_state)\n 2 0.000 0.000 0.002 0.001 :1(close)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:994(_emit_lazyload)\n 2 0.000 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1346(close)\n 22 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:186(operate)\n 2 0.001 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3664(_get_columns_info)\n 170 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:526(iterrows)\n 54/19 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:575(__eq__)\n5146/5119 0.001 0.000 0.002 0.000 {built-in method builtins.isinstance}\n 6 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1755(_join)\n 36/20 0.000 0.000 0.002 0.000 {built-in method _operator.eq}\n 22 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:51(_boolean_compare)\n 13 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1850(_join_left_to_right)\n 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:450(operate)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1806(_setup_result_proxy)\n 10 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:475(operate)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6889(convert_dtypes)\n 9 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4486(__init__)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:449(convert_dtypes)\n 75 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1596(pandas_dtype)\n 32 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6281(__getattr__)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:669(convert_dtypes)\n 17 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2032(_process_parameters_for_postcompile)\n 2 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4378(_generate_fromclause_column_proxies)\n 167/155 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1249(__get__)\n 26 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4219(_check_attach)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6284()\n 11 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:369(__eq__)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:194(_wrap_result)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2427(_on_table_attach)\n 24 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1419(__init__)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:41(listen)\n 27 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:549(find)\n 63/61 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1298(__getattr__)\n 112 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:219(_init_items)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6258()\n 56 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2583(visit_column)\n 16 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2214(_generate_columns_plus_names)\n 52 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:437(expect_col_expression_collection)\n 167 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:437(__get__)\n 52 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5229(_set_parent)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:160(_convert_arrays_to_dataframe)\n 9 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4592(_get_froms)\n 94 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2201(_set_type)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:280(listen)\n 18 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3368(visit_binary)\n 21 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2525(visit_label)\n1209/1200 0.000 0.000 0.001 0.000 {built-in method builtins.getattr}\n 1108 0.001 0.000 0.001 0.000 {method 'get' of 'dict' objects}\n 11 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4614(_get_item_cache)\n 42 0.001 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:289(__init__)\n 9 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3209(_set_parent)\n 24 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:273(_generative)\n 17 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4616(_normalize_froms)\n 140 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:587(_validate_dialect_kwargs)\n 61 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:857(dialect_impl)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4050(__getitem__)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/accessor.py:255(_wrap_result)\n 18 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3459(_generate_generic_binary)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:694()\n 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:985(convert_dtypes)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1264(oneshot)\n 167 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:183(_for_instance)\n 35 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:524(_post_coercion)\n 27 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1852(construct_params)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:411(_generate_cache_key)\n 96 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2020(replace)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:347(_generate_cache_key)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:465(__getattr__)\n 20 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1379()\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2526(to_compile_state)\n 7 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:3971(_ixs)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:96(arrays_to_mgr)\n 5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:475(__new__)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:177(_listen)\n 107 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1576(__iter__)\n 23/4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:221(_gen_cache_key)\n 13 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1802(__init__)\n 131 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/inspection.py:113(inspect)\n 22 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1313(oneshot)\n 170 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2119(_fetchiter_impl)\n 134/77 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1126(__get__)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:362(_listen)\n 120 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1958(_append_new_column)\n 25 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:526(get)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:333(base_listen)\n 212 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7496(quote)\n 16 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:380(__clause_element__)\n 13 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1949(_join_determine_implicit_left_side)\n 518/510 0.000 0.000 0.001 0.000 {built-in method builtins.hasattr}\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:327(_memoized_attr_expression)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3256(connect)\n 18 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:969(_dialect_info)\n 167 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:180(_for_class)\n 3 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:553(orm_setup_cursor_result)\n 35 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7593(ensure_index)\n 8 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:131(__init__)\n 17 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2902(_for_columns)\n 114 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:369(process)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1009(_set_parent)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6659(copy)\n 268 0.000 0.000 0.000 0.000 {method 'update' of 'dict' objects}\n 42 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:203(sub)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:434(_memoized_method___clause_element__)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2416(_setup_on_memoized_fks)\n 56 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4297(_set_parent)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3289(_literal_execute_expanding_parameter)\n 879/740 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n 170 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1097(fetchone)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:492(_deep_deannotate)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1202(_adapt_element)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:236(_from_objects)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1955(filter_by)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:78(instances)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1375(_is_dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:355(_concatenate_chunks)\n 199 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5140(__new__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4469(_set_item_mgr)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numeric.py:274(full)\n 13 0.000 0.000 0.000 0.000 :1(join)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:203(find_left_clause_to_join_from)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:536(is_string_dtype)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5437(_can_hold_identifiers_and_holds_name)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1178(__init__)\n 475 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:374(__call__)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2830(_construct_for_op)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:343(_compiler_dispatch)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5350(safe_construct)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1331(traverse)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1948(__init__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:5127(reindex)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:359(__missing__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:101(isna)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1359(_locate_col)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1983()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:184(_isna)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5340(reindex)\n 230 0.000 0.000 0.000 0.000 {method 'sub' of 're.Pattern' objects}\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:499(clone)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:487()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1141(replace)\n 102/96 0.000 0.000 0.000 0.000 {built-in method builtins.all}\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1240()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:596(_homogenize)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1157(maybe_infer_to_datetimelike)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7442(_requires_quotes)\n 167 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:137(__init__)\n 327 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:159(__getattr__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1687(_populate_separate_keys)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6233(__finalize__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1117(_corresponding_column)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1719(create_cursor)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_selectable_constructors.py:441(select)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3281(raw_connection)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:139(_parse_date_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:326(corresponding_column)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5269(join)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4458(_iset_item_mgr)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2614()\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1399(_get_dtype)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:441(connect)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_.py:186(construct_array_type)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2039(_connection_for_bind)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5124(__init__)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3850(__init__)\n 8/7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:335()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:2037(has_table)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1022(adapt)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1484(items)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1255(_checkout)\n 188 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4363(__contains__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4517(_bind_param)\n 187 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:152(_deannotate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:885(pandasSQL_builder)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2769(__init__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/api.py:28(_event_key)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:557(copy)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4494(_tq_label)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:145(_get_option)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:400(has_table)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:113()\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4596(_box_col_values)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:589(append)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1066(iset)\n 8/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:838(in_)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:343(__missing__)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6235(_all_selected_columns)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4894(_gen_tq_label)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5131()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2689(row_processor)\n 6/2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2099(in_op)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:849(__call__)\n 1 0.000 0.000 0.000 0.000 :1(has_table)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1861(from_array)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1777(first)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4656()\n 292 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1621(__contains__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:741(_only_one_row)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:358(append_to_list)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1691(_reflect_pk)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:228(_construct)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6429(dtypes)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1740(create_default_cursor)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2767(_generate_delimited_and_list)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:392(ensure_dtype_objs)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:707(checkout)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:806(_set_axis)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:5764(isna)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:407()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:301(_engine_insp)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:8690(isna)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3295(has_table)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2760(_generate_delimited_list)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:117(__instancecheck__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/_orm_constructors.py:2200(aliased)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/default_comparator.py:212(_in_impl)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:811(_instance_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1287(scalar)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4963(_reload)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1030(_alias_factory)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:520(_execute_on_scalar)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1403(constructor_copy)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:808(exported_columns)\n 108 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4275(_col_expressions)\n 148 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/encodings/utf_8.py:15(decode)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:245(_init_engine)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:332(for_modify)\n 111 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4814(__init__)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2761()\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1482(_init_metadata)\n 256 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:415(__getitem__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:714(__init__)\n 3 0.000 0.000 0.000 0.000 :1(_connection_for_bind)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4923(_set_parent)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2763()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:120(_stored_in_collection)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:751(checkin)\n 37 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:278(__init__)\n 170 0.000 0.000 0.000 0.000 {method 'fetchone' of 'psycopg2.extensions.cursor' objects}\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2533(expunge_all)\n 49 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2093(__init__)\n 415 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x56274e32b380}\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:790(_literal_coercion)\n 191 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1169(key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2796(visit_expression_clauselist)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1012(iget)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1483(cursor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2119(create_block_manager_from_column_arrays)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/concat.py:52(concat_compat)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:827(_iter_impl)\n 49 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5279(__new__)\n 52 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1927(add)\n 89 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:289(_compile)\n 72 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:464(__eq__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1107(_connection_for_bind)\n 107 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1578()\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:137(is_object_dtype)\n 64 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4555(go)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5323(__contains__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4647()\n 178 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:238(construct_from_string)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3648(visit_bindparam)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:191(_constraint_name)\n 137 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:42(_instancecheck)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:207(chunks)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1512(_close_special)\n 51 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_.py:118(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1600(_construct)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1434(_is_dtype_type)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2121(_join_check_and_adapt_right_side)\n 40 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1519(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4402()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1670(_fetchone_impl)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1633(_populate_column_collection)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2253(_fetchone_impl)\n 4/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:622(convert)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3028(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:801(_generate_fromclause_column_proxies)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:451(_return_conn)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:335(_accept_with)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1692()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:537(__init__)\n 23/21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:114(__enter__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:124(maybe_convert_platform)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:127(_get_single_key)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:144(_do_return_conn)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:503(extract_first_column_annotation)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:452(split_and_operate)\n 34/15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:423(get_cls_kwargs)\n 26 0.000 0.000 0.000 0.000 {method 'cursor' of 'psycopg2.extensions.connection' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:574(_ad_hoc_cache_key_from_args)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2188(_form_blocks)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1445(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/compat/_optional.py:85(import_optional_dependency)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4874(_setup_select_stack)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:770(_clean_thread_parent_frames)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:339(_from_mgr)\n 54 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:767(__contains__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:261(_isna_array)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2620()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1277(is_extension_array_dtype)\n 130 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/row.py:156(_mapping)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3671(_handle_array_type)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_.py:140(construct_from_string)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:664(_constructor_from_mgr)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1529(_soft_close)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:132(put)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:790(__getattr__)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:552(_kw_reg_for_dialect_cls)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1586(_simple_statement)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:5595()\n 706 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n 94 0.000 0.000 0.000 0.000 {built-in method _abc._abc_instancecheck}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:672(_constructor_sliced_from_mgr)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2716(new_block)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1612(_init)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:299(generate)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1895()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/accessor.py:220(__get__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2876(query)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2645(maybe_coerce_values)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3171(_resolve_column)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:925(traverse)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1834(_unwrapped_dialect_impl)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1515(_concat_same_type)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:235(__init__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:986(_gen_dialect_impl)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:124(_annotate)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:369()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:2131(_fetchall_impl)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:153(_do_get)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:780(name)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/accessor.py:188(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:804()\n 110 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:2233(_extra_kwargs)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1006(copy)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1072(_literal_coercion)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2076(__iter__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:281(_set_entities)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/_utils.py:23(to_numpy_dtype_inference)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:1006(convert_object_array)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2712(__init__)\n 54 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3899(_truncated_identifier)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:951(__call__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:269(_as_annotated_instance)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:1070()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:836(_literal_coercion)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:543(_allrows)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1620(__init__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:790(copy)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:261(helper)\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:273(is_dict_like)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:242()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:1028(convert)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:459(_detach_states)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexers/utils.py:419(check_array_indexer)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1129(fetchall)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1265(_iset_single)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:510(_validate_dtype)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5651(identical)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2498(cte)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:152(_constraint_name_for_table)\n 86 0.000 0.000 0.000 0.000 {method 'match' of 're.Pattern' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1198(is_bool_dtype)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1250(__iter__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1081(is_numeric_dtype)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:188(match)\n 62 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:115(__eq__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4403()\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:580(type_descriptor)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:873(traverse_using)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2808(self_group)\n 137 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/generic.py:37(_check)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1518()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:1305(__init__)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:502(_iterator_getter)\n 2 0.000 0.000 0.000 0.000 {built-in method numpy.zeros}\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:273(__call__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:684(get_multi_table_options)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:830(_generate_lazy_clause)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:537(_raw_all_rows)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:174(get)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:205(_effective_processors)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3189(setup_compile_state)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:246(_select_iterables)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1593(__getitem__)\n 138 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2289(to_instance)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1995(extend)\n 148 0.000 0.000 0.000 0.000 {built-in method _codecs.utf_8_decode}\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:827(_values)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:274(make_block)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:2580(limit)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1531(__clause_element__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:347(__init__)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1642()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:572(condition)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:564(dialect_options)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:322(_expand_cloned)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3573(_get_state_attr_by_column)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1352(all_selected_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1404(_offset_or_limit_clause)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2226(_handle_dbapi_exception)\n 198 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:173(_get_table_key)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1967(_populate_separate_keys)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:463(orm_pre_session_exec)\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5131(construct)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:157(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:287()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2703(new_block_2d)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:2301(adapt_type)\n 33/15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1550(_from_objects)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:633(is_integer_dtype)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:703(dtype)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2228(construct_from_string)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:882(safe_merge)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:7688(maybe_extract_name)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2050(_init)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1491()\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/pandas_compat.py:660(get_datetimetz_type)\n 90 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:774(__hash__)\n 108 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4286()\n 122 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:271(inner)\n 268 0.000 0.000 0.000 0.000 {method 'startswith' of 'str' objects}\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5215(visit_table)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:980(_anonymous_fromclause)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3105(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6328(union)\n 228 0.000 0.000 0.000 0.000 {method 'search' of 're.Pattern' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:553(_statement_20)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:669(_sliced_from_mgr)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1052(create_row_processor)\n 383 0.000 0.000 0.000 0.000 {built-in method builtins.setattr}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:726(alias)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1804(corresponding_column)\n 1 0.000 0.000 0.000 0.000 :1(limit)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:86(__init__)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1305(construct_from_string)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:786(__add__)\n 47 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7457(quote_schema)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4212(visit_alias)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:954(__init__)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:2052(quoted_token_parser)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4312(_create_union)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7567(format_label)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:377(__getitem__)\n 147 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1364(__init__)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:164(__len__)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1022(_literal_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:664(get_handle)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:842(_engine)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:568(require_length_match)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:223()\n 61 0.000 0.000 0.000 0.000 {method 'update' of 'set' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3021(_construct)\n 399 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:1375(cast)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4706(_make_proxy)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4297(__init__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:693(_sanitize_ndim)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2558(is_precedent)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:932(_init_collections)\n 232 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5213(__init__)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1571(validate_all_hashable)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1050(_instance)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4843(_clone)\n 311 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects}\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:730(name)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5552(equals)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2941(visit_function)\n 112 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1740(set_creation_order)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1054(construct_from_string)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:339(__init__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:567(post_exec)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:236(set_axis)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4772(all_selected_columns)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:831(construct_from_string)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:973(_gen_cache_key_inst)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1223(_set_memoized_attribute)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:666(_info_axis)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:635(_get_root)\n 54 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1438(self_group)\n 58 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:34(using_copy_on_write)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:737(_generate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/importlib/__init__.py:109(import_module)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1991(dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3977(limit)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1544(_hide_froms)\n 31 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2674(get_block_type)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2978(_process_clauses_for_boolean)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:636(__init__)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2002(internal_values)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1492(__getattr__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:330()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1814(_autobegin_t)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2454(is_boolean)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:180(blknos)\n 4 0.000 0.000 0.000 0.000 :1(where)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:6450(any)\n 1 0.000 0.000 0.000 0.000 :1018(_gcd_import)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5372(__add__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:496(_merge_cursor_description)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_decorators.py:325(wrapper)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3893(_offset_or_limit_clause)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4303()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:296(_annotate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:185(and_)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:2028(to_dict)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:529(is_string_or_object_np_dtype)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1778(_init_proxy_index)\n 1 0.000 0.000 0.000 0.000 :1002(_find_and_load)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2728()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:82(shape)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3064(_row_limit_clause)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1541(_initial_populate)\n 2 0.000 0.000 0.000 0.000 :1(filter)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3923(bindparam_string)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3124(and_)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:785()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:880(__init__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1547()\n 2/1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:584(__ne__)\n 6 0.000 0.000 0.000 0.000 {method 'fetchall' of 'psycopg2.extensions.cursor' objects}\n 42 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1602(executemany)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1883(limit_clause)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:469(__hash__)\n 35 0.000 0.000 0.000 0.000 :398(parent)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:234(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/_psycopg_common.py:120(__init__)\n 32 0.000 0.000 0.000 0.000 {method 'issuperset' of 'frozenset' objects}\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5373(__getitem__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2332(_soft_close)\n 77 0.000 0.000 0.000 0.000 {method 'difference' of 'set' objects}\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/pg_catalog.py:50(process)\n 27 0.000 0.000 0.000 0.000 {built-in method builtins.sorted}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:577()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/decl_api.py:1867(_inspect_decl_meta)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:481(ensure_wrapped_if_datetimelike)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:2313(is_unique)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:464(_cloned_set)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:254()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2103(union)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2172(process_expanding)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1761(all)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2837(visit_cast)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:339(dispatch_is)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:994(_static_cache_key)\n 156 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1606(_select_statement)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:798(begin)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1038(_default_multi_reflect)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/strings/accessor.py:207(_validate)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1772(as_readonly)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:689(get_plugin_class)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:6409(_reduce)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2134(_gen_cache_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3053(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:165(__setitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2996(function_argspec)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:147(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:1528()\n 55 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:71(_chk_pyarrow_available)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:676(_translate_key)\n 271 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:609(__init__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:458(get_children)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1362(all)\n 193 0.000 0.000 0.000 0.000 {built-in method builtins.hash}\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4545(_column_naming_convention)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3019()\n 37 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:51(__init__)\n 57 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:999(__len__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:286(__init__)\n 1 0.000 0.000 0.000 0.000 {built-in method _operator.ne}\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:226(is_string)\n 1 0.000 0.000 0.000 0.000 {method 'mogrify' of 'psycopg2.extensions.cursor' objects}\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2210(_safe_close_cursor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1638(_soft_close)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4903(__init__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:870(_unwrapped_dialect_impl)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4358(_set_parent)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:198(search)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4630()\n 36 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3362()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:994(_get_context_loader)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:165(simplefilter)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5348(__init__)\n 97 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1215(_reset_memoizations)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5228(_with_annotations)\n 48 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:334(is_hashable)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1566(_expanded_proxy_set)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:2012(set_committed_value)\n 118 0.000 0.000 0.000 0.000 {method 'endswith' of 'str' objects}\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2431(is_comparison)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4702(_from_objects)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:870(_post_coercion)\n 52 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:816(iterate)\n 1 0.000 0.000 0.000 0.000 :1(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2427(visit_grouping)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:86(_validate_set_axis)\n 191 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3886(_truncate_bindparam)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1693(label)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:287(get_dtypes)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:262(__init__)\n 58 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:300()\n 49 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:455(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1776(_bind_processors)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5940(where)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:87(allows_duplicate_labels)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2158(_entity_namespace_key)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:515(_inspect_mapped_class)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2679(__init__)\n 12 0.000 0.000 0.000 0.000 :1033(_handle_fromlist)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:3071(setup_compile_state)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1757(get_result_processor)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:144(__hash__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3775(_resolve_value_to_type)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4606()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2811(ensure_block_shape)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:3777(get_loc)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2171(name)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:342(construct_from_string)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/nanops.py:482(nanany)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4600()\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1270(is_1d_only_ea_dtype)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:649(_simple_new)\n 58 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:389(__bool__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2104(__repr__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1424(_next)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6308(self_group)\n 108 0.000 0.000 0.000 0.000 {method 'group' of 're.Match' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4593(__init__)\n 76 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3077(_apply_item_processor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1784()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5031(_render_cte_clause)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:442(_row_getter)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3051(_set_parent_with_dispatch)\n 71 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:181(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1036(coerce_compared_value)\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3070(_get_operator_dispatch)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2787(visit_clauselist)\n 115 0.000 0.000 0.000 0.000 {built-in method builtins.iter}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:669()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:866(_instantiate_types)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:1835(construct_from_string)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4308(__init__)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:570(_log_notices)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3358()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:794(_autobegin)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:491(__call__)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/types.py:171(__get__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:586()\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:416(extract_array)\n 148 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2233(_soft_close)\n 82 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:256(__enter__)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/naming.py:142(_get_convention)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:84()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2117(_clone)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2095(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2215(construct_array_type)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:301(_with_annotations)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:830(__add__)\n 55 0.000 0.000 0.000 0.000 {method 'copy' of 'dict' objects}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:101(_should_log_debug)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1786()\n 1 0.000 0.000 0.000 0.000 {method 'any' of 'numpy.ndarray' objects}\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:909(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:173()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2365(shape)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1462(_set_as_cached)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:638(_extract_index)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1848(from_blocks)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1114(_memoized_method___clause_element__)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1109(ident)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1685(_clean_options)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6303(_needs_parens_for_grouping)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:418(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:916(_cached_result_processor)\n 56 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:436(__getitem__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:583(copy_func)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4573(_ensure_valid_index)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1835(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:115(__init__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2795(extend_blocks)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5398(apply_map)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2958(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1388(enumerate)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5111(_create_raw_select)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1780(_consolidate_inplace)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:187(_join)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:388(_inspect_func_args)\n 32 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1590()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1675(_fetchall_impl)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/typing.py:310(is_non_string_iterable)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:910(__len__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1029(_take_snapshot)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:181(_add_filter)\n 40 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:236(is_large_string)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4391(_add_to_result_map)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1186(mappings)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:836(__iter__)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:844(__init__)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7572(format_alias)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:752(_maybe_repeat)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:455(__contains__)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1695()\n 111 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_arrow.py:207(dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_methods.py:55(_any)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2266(_fetchall_impl)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py:2111(__eq__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:436(_split)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:137(__new__)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5154(_memoized_method_lower)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/apply.py:1377(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2701(_connection_begin_impl)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2139(_entity_namespace)\n 1 0.000 0.000 0.000 0.000 :1(select_from)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/exc.py:604(instance)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:257(_adjust_fn_spec)\n 63 0.000 0.000 0.000 0.000 {method 'replace' of 'str' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2341(_get_extra_criteria)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3931(_from_objects)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:191(_validate_parse_dates_presence)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:304(_get_filepath_or_buffer)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2175(_grouping_func)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:215(to_pyarrow_type)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:443(_column_naming_convention)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:394(adapt_to_entity)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexing.py:2765(check_dict_or_set_indexers)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2310(_select_args)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:138(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:201(_simple_new)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1010(view)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:301(maybe_iterable_to_list)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4149()\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:351(notify)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1762(is_consolidated)\n 17 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1598(_proxy_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:267(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1314(fetchall)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:502(new_instance)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/re.py:250(compile)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:466(array_equivalent)\n 39 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:649(_get_deprecated_option)\n 1 0.000 0.000 0.000 0.000 {method 'reduce' of 'numpy.ufunc' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3055(_resolve_col_tokens)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:4399(_check_setitem_copy)\n 93 0.000 0.000 0.000 0.000 {built-in method from_iterable}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4479(_tq_key_label)\n 1 0.000 0.000 0.000 0.000 :156(__enter__)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:3317(_render_bindtemplate)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1477(comparator)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:1427(or_)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1677(cast)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:819(get_connection)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2357(_adjust_for_extra_criteria)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:786(_getitem)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5170(_get_engine_target)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/types.py:171(__init__)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:123()\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:901(_post_coercion)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:106(remove)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7671(format_label_name)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:481()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3144(or_)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:516(run_generated_dispatch)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:417(to_list)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:458(__enter__)\n 64 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:139()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:900(_cached_bind_processor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1770(_consolidate_check)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:516(_inc_counter)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:617(_select_options)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:1837(_initialize_collection)\n 43 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1707(_get_current_adapter)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:43(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2651(visit_typeclause)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1969(process)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4220()\n 2 0.000 0.000 0.000 0.000 :166(_get_module_lock)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/properties.py:468(_fallback_getattr)\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:184(is_duration)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:835(is_disconnect)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1272(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3391(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1094(_begin_impl)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:121(classes)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2985(_autoflush)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/typing_extensions.py:582(__instancecheck__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:2303(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:305(_isna_string_dtype)\n 94 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/_json.py:159(typecast_json)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:913(coerce_compared_value)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:949(process)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:259(__exit__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:732(_sanitize_str_dtypes)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2217()\n 46 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:366(__hash__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2781(_from_objects)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:609(_dtype_to_subclass)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1865(filter)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:214(is_extension)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4222()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:550(__setitem__)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:718(dtype)\n 6 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}\n 60 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:354(_listen_fn)\n 50 0.000 0.000 0.000 0.000 {method 'intersection' of 'set' objects}\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:368(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/missing.py:564(_array_equivalent_object)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1295(_fallback_getattr)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:1165(_is_binary_mode)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:483(_view)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1579(__get__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:606(_from_objects)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/compute.py:215(_handle_options)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:213(_init_global_attributes)\n 49 0.000 0.000 0.000 0.000 {method 'get' of 'mappingproxy' objects}\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/pg_catalog.py:53()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/base.py:341(opt_manager_of_class)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1615(__getattr__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3886(_order_by_clause)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:830(_hasna)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_asarray.py:27(require)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1990(__exit__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:827(__init__)\n 2 0.000 0.000 0.000 0.000 {method 'view' of 'numpy.ndarray' objects}\n 29 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1411()\n 22 0.000 0.000 0.000 0.000 {method 'discard' of 'set' objects}\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:166(__setattr__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_.py:109(na_value)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:426(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:74(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1627(_get_options_with_defaults)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1010(_implicit_coercions)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:1204(is_potential_multi_index)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_internal.py:920(npy_ctypes_check)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:104(_should_log_info)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1487(__getattr__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4662(element)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:348(__new__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:182(_make_key_to_index)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1967(items)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1689(isEnabledFor)\n 25 0.000 0.000 0.000 0.000 {method 'close' of 'psycopg2.extensions.cursor' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:339(__add__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1709(render_bind_cast)\n 63 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:346(_name_get)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2763(_select_iterable)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5021(_generate_prefixes)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:252(_key)\n 53 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:252(create_for_statement)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numerictypes.py:357(issubdtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1622(close)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:340()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:375(entity_namespace)\n 35 0.000 0.000 0.000 0.000 {method 'rpartition' of 'str' objects}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:195(is_array_like)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:225(_full)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:231(_get)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1938(_block)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5106(group_by_clause)\n 57 0.000 0.000 0.000 0.000 {built-in method builtins.id}\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:80(_memoized_attr_ref)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:535(_still_open_and_dbapi_connection_is_valid)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:907(from_execution_options)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1993(__init__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/config.py:688(_warn_if_deprecated)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1436(adapt)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:201(_set_noconvert_columns)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/exc.py:692(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:131()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:3095(_link_to_col_by_colstring)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:973(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:4611(_clear_item_cache)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:228(_put)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1552(proxy_set)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5676()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/registry.py:256(with_wrapper)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:5995(select_from)\n 5 0.000 0.000 0.000 0.000 {method 'reshape' of 'numpy.ndarray' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4266(flush)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:283(__new__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:259(all_states)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3719(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1776()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:720(visit_has_cache_key_list)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1064(soft_close)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:659(_constructor_from_mgr)\n 28 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:340(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:370()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:372(apply_if_callable)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:561(_manage_size)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/apply.py:121(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:475(initialize_collection)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:437(__init__)\n 50 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:147()\n 16 0.000 0.000 0.000 0.000 {method '__enter__' of '_thread.RLock' objects}\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:798(_post_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:2133(_refine_defaults_read)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:585(_get_axis)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4063(__init__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:412(_gen_cache_key)\n 54 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:404(flags)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2743(_construct_raw)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_typing.py:353(is_quoted_name)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4253()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:808(__len__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:956()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/api.py:386(default_index)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:428(__setitem__)\n 1 0.000 0.000 0.000 0.000 {built-in method _functools.reduce}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1331(is_ea_or_datetimelike_dtype)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3054()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3969(_has_row_limiting_clause)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:367()\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:152(cast_scalar_indexer)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:520(_cmpkey)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:640(getquoted)\n 2 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:20(get_schema_table_names)\n 33 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1072(_effective_plugin_target)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2783()\n 49 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3059(_from_objects)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1697()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4653()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:484()\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2152()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3110(_construct_raw)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:711(_get_plugin_class_for_plugin)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4672(_get_display_froms)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1137(scalars)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2244(_stack_arrays)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2888(selectable)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:55(using_pyarrow_string_dtype)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1872(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3294(__init__)\n 12 0.000 0.000 0.000 0.000 {method 'union' of 'set' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:732()\n 1 0.000 0.000 0.000 0.000 :1(unique)\n 1 0.000 0.000 0.000 0.000 :203(_lock_unlock_module)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:515(_has_column_expression)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:6031()\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:179(is_timestamp)\n 45 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1544(_select_iterable)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:438(enter_context)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:389(standardize_mapping)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:894(entity)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1253(iget)\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/operators.py:2473(is_associative)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5117(order_by_clause)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5510(__contains__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:340(_red)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:541()\n 41 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4830(get_children)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4034(get_multi_pk_constraint)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:394(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4933()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2135(__hash__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:5515(_add_table)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:553(equals)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2166()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/_config/__init__.py:42(warn_copy_on_write)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1267(memo)\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.locals}\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/flags.py:55(allows_duplicate_labels)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:193(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3572(coerce_compared_value)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:825(__iter__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:2068()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:305(_connection_insp)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:362(attrs)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/abc.py:121(__subclasscheck__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1122()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:930(_commit)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/multiarray.py:1080(copyto)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3168(self_group)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/array.py:262(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:936(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:1631(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:92(_gen_annotations_cache_key)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:787(name)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1939(_strict_as_bool)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/numerictypes.py:283(issubclass_)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:156(_adjust_fn_spec)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:348(_constructor)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:591(_ensure_array)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:686()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/exc.py:477(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2766()\n 22 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:2162(_get_reference_cte)\n 37 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:217(selectable)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:831(_reset_identity)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:194(close)\n 3 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/helpers/selector.py:330()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:4308(_is_clean)\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:442(__setitem__)\n 1 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:165(sqa_profiled)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1143(_reset)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py:1152(create_row_processor)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1820(load_dialect_impl)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4774()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:643(_getitem)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:353(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4144(_set_parent)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:219(_can_consolidate)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1699()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1225(__init__)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3760(__init__)\n 44 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:885(mapper)\n 34 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:1672(_from_objects)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:466(_type_memos)\n 19 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:660(_constructor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:405(_clone)\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:200(_copy_internals)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:861(_references)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:362(_make_index)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:369(_key)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:1249(shape)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:589(_has_bind_expression)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:1914(_set_table)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/warnings.py:477(__exit__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/concat.py:73()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:928(_select_iterable)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:760(get)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:248(stringify_path)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_typing.py:349(has_schema_attr)\n 30 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4856(_from_objects)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:706()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:862(__contains__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:1233(dedup_names)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:209(is_large_binary)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3733(adapt_emulated_to_native)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:884(keys)\n 50 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/base.py:613(ndim)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1674(_check_file_or_buffer)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3649()\n 35 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:197(_clone)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:713(warn)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:868(array)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:3836(set_label_style)\n 2 0.000 0.000 0.000 0.000 :87(acquire)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:194(_state_session)\n 20 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:601(_set_noconvert_dtype_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:197(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:999()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:342(_resolve_for_literal)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:786(adapt_to_entity)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:48(_kill)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:775(keys)\n 2 0.000 0.000 0.000 0.000 :58(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:929(__getattr__)\n 4 0.000 0.000 0.000 0.000 {method 'remove' of 'list' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:126(_classes_and_not_datetimelike)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1308(_populate_full)\n 28 0.000 0.000 0.000 0.000 {method 'keys' of 'dict' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1634(_make_proxy)\n 5 0.000 0.000 0.000 0.000 {built-in method _weakref._remove_dead_weakref}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:524(dialect_kwargs)\n 32 0.000 0.000 0.000 0.000 {method 'pop' of 'set' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:551()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:871(_do_date_conversions)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:456(_engine_type)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1667(_fetchiter_impl)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/construction.py:685(_sanitize_non_ordered)\n 30 0.000 0.000 0.000 0.000 {method 'popleft' of 'collections.deque' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:179(__len__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:93()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/inspect.py:73(isclass)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:428(has_intersection)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1188(oneshot)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1746(pre_exec)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:570(connection)\n 40 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:376(dtype)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:185()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:246(is_mapped)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:47(is_null)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7191(visit_string)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7289(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:550(infer_compression)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2692(_with_polymorphic_mappers)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/inference.py:105(is_file_like)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1111(get_multi_table_options)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:170(get)\n 23 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1316(memo)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2372(iget)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4508(_non_anon_label)\n 27 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1280(_post_coercion)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1706(__init__)\n 2 0.000 0.000 0.000 0.000 {built-in method _abc._abc_subclasscheck}\n 28 0.000 0.000 0.000 0.000 {built-in method builtins.any}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:465(_push_cm_exit)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:452(_constructor)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:189(is_time)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/collections.py:488(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3640()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2689(_deactivate_from_connection)\n 12 0.000 0.000 0.000 0.000 {method 'strip' of 'str' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:3472(_prepare_filter_names)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:3143(entity_namespace)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5201(__get__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:106(keys)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:2668(_get_entity_clauses)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4766()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:276()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_asarray.py:108()\n 2 0.000 0.000 0.000 0.000 :112(release)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1390(embedded)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:219(get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:338(session)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:146(__new__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:183(maybe_box_native)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:182(__init__)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4810(_is_star)\n 16 0.000 0.000 0.000 0.000 {method '_is_owned' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:288()\n 3 0.000 0.000 0.000 0.000 {method 'throw' of 'generator' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:676(lint)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/util.py:1076(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1987()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py:57(is_exit_exception)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4864(_render_label_in_columns_clause)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1604()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:996(_literal_coercion)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:131(coerce_to_immutabledict)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:410(coerce_generator_arg)\n 1 0.000 0.000 0.000 0.000 :160(__exit__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:348()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2632(get_bind)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:1548(for_context)\n 25 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/series.py:1471(_clear_item_cache)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:571(_get_axis_number)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/string_.py:136(type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:350(_maybe_make_multi_index_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:507(_cleanup)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:1898(get_select_precolumns)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:222(_empty)\n 2 0.000 0.000 0.000 0.000 :185(cb)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:323(_deannotate)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:536(dict)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/arrays/arrow/array.py:794(dtype)\n 7 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:252(_init_connection)\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:5144(_values)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:459()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:279(__str__)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:175(kind)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:540()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:642()\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:363(ndim)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:279(_extract_multi_indexer_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:2006(array_values)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1644(get)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:354(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:770(_type_affinity)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:79(_is_literal)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_validators.py:450(check_dtype_backend)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:220(_resolve_for_literal)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:183(method_is_overridden)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:421(_supports_2d)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/path_registry.py:673(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:737(self_group)\n 3 0.000 0.000 0.000 0.000 :1(_generated_cache_key_traversal)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/loading.py:183()\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:1155()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:652(visit_string_list)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:196(blklocs)\n 14 0.000 0.000 0.000 0.000 {method 'find' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4026(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:395(_set_propagate_attrs)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:631(self_group)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1546(_all_columns)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:1040(needs_i8_conversion)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1715(_reflect_fk)\n 9 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:246(is_date)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1877(_reflect_unique_constraints)\n 16 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:588(_hide_froms)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:207(validate_header_arg)\n 1 0.000 0.000 0.000 0.000 {method 'take' of 'numpy.ndarray' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:207(_add_unpresent)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:632(visit_with_context_options)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1445(is_valid)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:209(is_object)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:458(__repr__)\n 14 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:344()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/exc.py:48(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:930()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1014()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:223(__len__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1939(_reflect_table_comment)\n 12 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4132(table)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:289()\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:878(_state_dict)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:214(schema_for_object)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:948(from_blocks)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:2380(_check_configure)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/interfaces.py:1689(get_table_options)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:515(get_compression_method)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1576(_init_cte_state)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:134(__getitem__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1570(__bool__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1703(parse_user_argument_for_enum)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2019(params)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4000()\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:6672(_maybe_cast_indexer)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1146(reset)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1989()\n 12 0.000 0.000 0.000 0.000 {built-in method _warnings._filters_mutated}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:743(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1996()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:426(_no_statement_condition)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:865(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/uuid.py:267(__hash__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:159(_insert_item)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:219(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:1002(_extra_kwargs)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:485(__str__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:147(__class_getitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1036(unique)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:407(__iter__)\n 4 0.000 0.000 0.000 0.000 {method 'insert' of 'list' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:430(_bind_typing_render_casts)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/util.py:105(_trans_ctx_check)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:112(check_modified)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5287(apply_map)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:807(adapter)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:179(__clause_element__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:1229()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/construction.py:196(mgr_to_mgr)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1925(_reflect_check_constraints)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7125(visit_VARCHAR)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:906(process)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:1117(_maybe_memory_map)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7452(_requires_quotes_illegal_chars)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4408()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:974(dtype)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:408(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/copy.py:66(copy)\n 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/common.py:91(ensure_python_int)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:1671(name)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:97()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3066()\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:92()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/selectable.py:4429(_all_selected_columns)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:568()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:747(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/_collections_abc.py:802(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:24(_kind_name)\n 2 0.000 0.000 0.000 0.000 :1()\n 2 0.000 0.000 0.000 0.000 {built-in method sys.exc_info}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:188(all_none)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/numpy/core/_dtype.py:330(_name_includes_bit_suffix)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/compat/_optional.py:74(get_version)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1010(_iterate_self_and_parents)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:1458(_is_native_for_emulated)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1718(unique)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:246(items)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1358(asint)\n 2 0.000 0.000 0.000 0.000 {method 'union' of 'frozenset' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:893(_check_data_length)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1191(memo)\n 2 0.000 0.000 0.000 0.000 {method 'encode' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:1269(_process_date_conversion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/base.py:791(is_)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1850()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:1701()\n 2 0.000 0.000 0.000 0.000 :1(set)\n 3 0.000 0.000 0.000 0.000 {method 'bit_length' of 'int' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:438(_no_limit_offset)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:1913(_filter_by_zero)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:3887(__bool__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:408()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:421(clauses)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:1123(_make_date_converter)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:2056(_clean_na_values)\n 8 0.000 0.000 0.000 0.000 {method 'extend' of 'list' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:403()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:229(__iter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:852(_unique_strategy)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pyarrow/types.py:119(is_floating)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/_validators.py:226(validate_bool_kwarg)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/visitors.py:700(visitor_iterator)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1041(in_transaction)\n 3 0.000 0.000 0.000 0.000 {method 'difference_update' of 'set' objects}\n 1 0.000 0.000 0.000 0.000 :2(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5022()\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:1826(ndim)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:470(_parse_letter_version)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:131(close)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1097(entity)\n 4 0.000 0.000 0.000 0.000 {built-in method sys.getrefcount}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:366(__init__)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1747(__enter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:757(_generate)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:149(__lt__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/query.py:231(_propagate_attrs)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:688(do_begin)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:436()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4893()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:4422()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2085()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:213(__new__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3580(bind_processor)\n 1 0.000 0.000 0.000 0.000 :948(_sanity_check)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/mapper.py:872(_gen_cache_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4132()\n 2 0.000 0.000 0.000 0.000 {built-in method time.perf_counter}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:1563()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:982(type)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1152(_post_coercion)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/instrumentation.py:493(get_impl)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:885(bind_processor)\n 1 0.000 0.000 0.000 0.000 {method 'decode' of 'bytes' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:398(_create_exit_wrapper)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:348(_is_boolean)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/base.py:337(_is_numeric)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:1548()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:946(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:4635(_render_label_in_columns_clause)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:395(visit_clauseelement)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:1426(_validate_parse_dates_arg)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/missing.py:1073(clean_reindex_fill_method)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2064(table_comment)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3045(_set_parent)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:4388(get_render_as_alias_suffix)\n 1 0.000 0.000 0.000 0.000 :152(__init__)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/context.py:185()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/psycopg2/extras.py:633(__init__)\n 1 0.000 0.000 0.000 0.000 {method 'seek' of '_io.StringIO' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:980(_is_transaction_boundary)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:544(__len__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:212()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2204(__init__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/base.py:159(_freeze)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:913(__init__)\n 1 0.000 0.000 0.000 0.000 {built-in method sys.getfilesystemencoding}\n 1 0.000 0.000 0.000 0.000 :1(_generated_get_children_traversal)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:391(_from_objects)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:5238(type)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/functions.py:925(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:255(_has_complex_date_col)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:470(_push_exit_callback)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5067()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:330()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/interfaces.py:1442(create_row_processor)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:527(validate_integer)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:891(process)\n 2 0.000 0.000 0.000 0.000 {method 'count' of 'list' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:640()\n 4 0.000 0.000 0.000 0.000 {built-in method _imp.acquire_lock}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:686(__init__)\n 4 0.000 0.000 0.000 0.000 {built-in method _thread.allocate_lock}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:434(_ensure_has_table_connection)\n 2 0.000 0.000 0.000 0.000 {method 'difference' of 'frozenset' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:912(__str__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:213()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:1663(_attributes)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:7109(_render_string_type)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2087()\n 1 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:703(_resolve_for_literal)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:388()\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:534()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/annotation.py:106()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1372()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:2365(_validate_skipfooter)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2052(unique_constraints)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:1447(is_index_col)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/util.py:1515()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/schema.py:4211()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:2267(_extract_dialect)\n 1 0.000 0.000 0.000 0.000 {built-in method _codecs.lookup_error}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py:372(_entity_namespace)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:3568(native)\n 1 0.000 0.000 0.000 0.000 {method 'intersection' of 'frozenset' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:543(closed)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5070()\n 4 0.000 0.000 0.000 0.000 {built-in method _imp.release_lock}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:235()\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:243(result_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:792(value)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:188(_expand_user)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:2239(array_values)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:2136()\n 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2044(foreign_keys)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/base.py:323(_consolidate_inplace)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/reflection.py:2056(check_constraints)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py:326()\n 4 0.000 0.000 0.000 0.000 {built-in method _thread.get_ident}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/compat/numpy/function.py:64(__call__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/base.py:977(_gen_cache_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:444(mapper)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:94(_process_parse_dates_argument)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:994(hard_close)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/base.py:2639(visit_UUID)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:221()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:234(__enter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/frame.py:655(_constructor)\n 1 0.000 0.000 0.000 0.000 {method 'partition' of 'str' objects}\n 3 0.000 0.000 0.000 0.000 {method 'isascii' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/common.py:292(is_fsspec_url)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/util/version/__init__.py:508(_parse_local_version)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/common.py:192()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1373()\n 1 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/helpers/selector.py:160()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2745(visit_null)\n 1 0.000 0.000 0.000 0.000 {method 'upper' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:986(_validate_usecols_arg)\n 1 0.000 0.000 0.000 0.000 {method 'setdefault' of 'dict' objects}\n 1 0.000 0.000 0.000 0.000 {method 'pop' of 'collections.deque' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:2971()\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.ord}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:549(invalidated)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:436(_pending_mutations)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/indexes/range.py:216(_validate_dtype)\n 1 0.000 0.000 0.000 0.000 {method 'reverse' of 'list' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:559(_validate_names)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:560()\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.globals}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:5084(get_cte_preamble)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:562()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1786(handle_dbapi_exception)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py:247()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1572(_global_attributes)\n 1 0.000 0.000 0.000 0.000 {method 'release' of '_thread.lock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/traversals.py:398(visit_clauseelement_list)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/sql.py:1448(__enter__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1987(__enter__)\n 1 0.000 0.000 0.000 0.000 {method 'with_traceback' of 'BaseException' objects}\n 1 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/models.py:70()\n\n\n\n" - } - ], - "source": [ - "%time\n", - "\n", - "# SAMPLE = 10_000\n", - "_SOURCE_L = \"naive_data_hub_v1\"\n", - "\n", - "dh_selector = selector(\n", - " table=\"dbt.data_hub__companies\",\n", - " fields=[\"name\", \"company_number\", \"address_postcode\"],\n", - ")\n", - "\n", - "with sqa_profiled():\n", - " dh_raw = cmf.query(\n", - " selector=dh_selector, return_type=\"pandas\", model=_SOURCE_L#, limit=SAMPLE\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n data_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n dbt_data_hub__companies_name \\\n0 National Star Centre For Disabled Youth Ltd \n1 HAWKESBURY CONSULTING LIMITED \n2 BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION... \n\n dbt_data_hub__companies_company_number \\\n0 \n1 06736356 \n2 \n\n dbt_data_hub__companies_address_postcode \n0 GL53 9QU \n1 CB24 4UQ \n2 B4 6NH ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1dbt_data_hub__companies_namedbt_data_hub__companies_company_numberdbt_data_hub__companies_address_postcode
0b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...National Star Centre For Disabled Youth Ltd<NA>GL53 9QU
1b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...HAWKESBURY CONSULTING LIMITED06736356CB24 4UQ
2b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION...<NA>B4 6NH
\n
" - }, - "metadata": {}, - "execution_count": 47 - } - ], - "source": [ - "dh_raw.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\nWall time: 4.77 µs\n" - } - ], - "source": [ - "%time\n", - "\n", - "dh_raw = cmf.query(\n", - " selector=dh_selector, return_type=\"pandas\", model=_SOURCE_L#, limit=SAMPLE\n", - ")" - ] - }, - { - "source": [ - "import time\n", - "from datetime import timedelta\n", - "\n", - "start = time.time()\n", - "\n", - "ew_selector = selector(\n", - " table=\"dbt.export_wins__wins_dataset\",\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "\n", - "ew_raw = cmf.query(\n", - " selector=ew_selector, return_type=\"pandas\", model=\"naive_export_wins_v1\"\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "ew_raw.head(3)" - ], - "cell_type": "code", - "metadata": { - "tags": [] - }, - "execution_count": 49, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:00:29.497772\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11... \n1 b'\\x04\\xdfY\\xad\\xadtT\\x1b\\xed\\xfd\\x06w\\xe9J\\xf... \n2 b'\\x06\\xc1S\\xb5p\\x88SZ\\xbcV\\xd0a\\xfbT\\xad\\xd3g... \n\n data_sha1 \\\n0 b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11... \n1 b'&\\x04\\x9a\\xda~v\\xbeu?F\\xf0\\xfd\\x92\\xa7IP\\xfa... \n2 b'\\x8cV\\xb8[\\xac\\xa6K,]\\xb1\\x96\\xbf\\xfe\\x1a\\x9... \n\n dbt_export_wins__wins_dataset_company_name \\\n0 ETA Green Power Limited \n1 Med-Eq (Europe) Ltd \n2 Silver Lined Horizons Ltd \n\n dbt_export_wins__wins_dataset_cdms_reference \n0 Companies House ref: 12359858 \n1 ORG-10109781 \n2 ORG-10170829 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1dbt_export_wins__wins_dataset_company_namedbt_export_wins__wins_dataset_cdms_reference
0b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11...b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11...ETA Green Power LimitedCompanies House ref: 12359858
1b'\\x04\\xdfY\\xad\\xadtT\\x1b\\xed\\xfd\\x06w\\xe9J\\xf...b'&\\x04\\x9a\\xda~v\\xbeu?F\\xf0\\xfd\\x92\\xa7IP\\xfa...Med-Eq (Europe) LtdORG-10109781
2b'\\x06\\xc1S\\xb5p\\x88SZ\\xbcV\\xd0a\\xfbT\\xad\\xd3g...b'\\x8cV\\xb8[\\xac\\xa6K,]\\xb1\\x96\\xbf\\xfe\\x1a\\x9...Silver Lined Horizons LtdORG-10170829
\n
" - }, - "metadata": {}, - "execution_count": 49 - } - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:01:25.730407\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n data_sha1 \\\n0 b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1... \n1 b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd... \n2 b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\... \n\n dbt_data_hub__companies_name \\\n0 National Star Centre For Disabled Youth Ltd \n1 HAWKESBURY CONSULTING LIMITED \n2 BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION... \n\n dbt_data_hub__companies_company_number \\\n0 \n1 06736356 \n2 \n\n dbt_data_hub__companies_address_postcode \n0 GL53 9QU \n1 CB24 4UQ \n2 B4 6NH ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1dbt_data_hub__companies_namedbt_data_hub__companies_company_numberdbt_data_hub__companies_address_postcode
0b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...b\"B\\xae\\\\4\\x83\\xe8\\xad#\\x91z'\\xa3\\x0e\\xbb#\\xb1...National Star Centre For Disabled Youth Ltd<NA>GL53 9QU
1b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...b'K\\xfa\\xb4\\xb9\\xac\\xbe\\x8e\\x8c\\xdd\\x12\\x0e\\xd...HAWKESBURY CONSULTING LIMITED06736356CB24 4UQ
2b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...b'\\\\\\xe1WW\\x97\\x06$\\x9eV=12h\\xce\\x7f:\\xaa\\x9a\\...BIRMINGHAM WOMENS AND CHILDRENS NHS FOUNDATION...<NA>B4 6NH
\n
" - }, - "metadata": {}, - "execution_count": 50 - } - ], - "source": [ - "import time\n", - "from datetime import timedelta\n", - "\n", - "start = time.time()\n", - "\n", - "dh_selector = selector(\n", - " table=\"dbt.data_hub__companies\",\n", - " fields=[\"name\", \"company_number\", \"address_postcode\"],\n", - ")\n", - "\n", - "dh_raw = cmf.query(\n", - " selector=dh_selector, return_type=\"pandas\", model=\"naive_data_hub_v1\"\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "dh_raw.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Total time: 0:02:23.852911\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": " cluster_sha1 \\\n0 b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\... \n1 b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\... \n2 b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x... \n\n data_sha1 \\\n0 b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\... \n1 b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\... \n2 b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x... \n\n companieshouse_companies_company_name \\\n0 ARCADE GEEKS INT LTD \n1 LOWELL GROUP SHARED SERVICES LIMITED \n2 KIMDOOLE LTD \n\n companieshouse_companies_company_number companieshouse_companies_postcode \n0 13231865 DY13 9RH \n1 08647094 LS15 8GH \n2 14445223 WC2H 9JQ ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cluster_sha1data_sha1companieshouse_companies_company_namecompanieshouse_companies_company_numbercompanieshouse_companies_postcode
0b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\...b'\\x00O!\\x9bf\\x91\\xb0\\xfe\\xb9v]\\x0c\\xa3\\xb6l5\\...ARCADE GEEKS INT LTD13231865DY13 9RH
1b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\...b'\\x00]\\x95\\x8a\\xbex\\x1bA\\xa6\\xa5\\xf9\\x88\\x17\\...LOWELL GROUP SHARED SERVICES LIMITED08647094LS15 8GH
2b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x...b'\\x00af\\x91\\x8f\\x97xH\\xc3\\x9f\\xa6\\r\\x13\\xf1\\x...KIMDOOLE LTD14445223WC2H 9JQ
\n
" - }, - "metadata": {}, - "execution_count": 51 - } - ], - "source": [ - "import time\n", - "from datetime import timedelta\n", - "\n", - "start = time.time()\n", - "\n", - "ch_selector = selector(\n", - " table=\"companieshouse.companies\",\n", - " fields=[\"company_name\", \"company_number\", \"postcode\"],\n", - ")\n", - "\n", - "ch_raw = cmf.query(\n", - " selector=ch_selector, return_type=\"pandas\", model=\"naive_companies_house_v1\"\n", - ")\n", - "\n", - "elapsed = time.time() - start\n", - "print(f\"Total time: {timedelta(seconds=elapsed)}\")\n", - "ch_raw.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ch_raw.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\nWall time: 4.29 µs\n" - } - ], - "source": [ - "%time\n", - "\n", - "SAMPLE = 10_000\n", - "_SOURCE_L = \"naive_export_wins_v1\"\n", - "\n", - "ew_selector = selector(\n", - " table=\"dbt.export_wins__wins_dataset\",\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "\n", - "# with sqa_profiled():\n", - "ew_raw = cmf.query(\n", - " selector=ew_selector, return_type=\"sqlalchemy\", model=_SOURCE_L, limit=SAMPLE\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "sqlalchemy.engine.result.ChunkedIteratorResult" - }, - "metadata": {}, - "execution_count": 12 - } - ], - "source": [ - "type(ew_raw)" - ] - }, - { - "source": [ - "# Export wins\n", - "\n", - "Takes ages here, but runs VERY fast in PG directly." - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "model = \"naive_export_wins_v1\"\n", - "ew_selector = selector(\n", - " table=\"dbt.export_wins__wins_dataset\",\n", - " fields=[\"company_name\", \"cdms_reference\"],\n", - ")\n", - "\n", - "# We want raw data with clusters attached\n", - "parent, child = _parent_to_tree(model, engine=ENGINE)\n", - "if len(parent) == 0:\n", - " raise ValueError(f\"Model {model} not found\")\n", - "tree = [parent] + child\n", - "reachable_stmt = _tree_to_reachable_stmt(tree)\n", - "lookup_stmt = _reachable_to_parent_data_stmt(reachable_stmt, parent)\n", - "data_stmt = _selector_to_data(ew_selector, engine=ENGINE).cte()\n", - "\n", - "final_stmt = select(lookup_stmt.c.parent.label(\"cluster_sha1\"), data_stmt).join(\n", - " lookup_stmt, lookup_stmt.c.child == data_stmt.c.data_sha1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "with ENGINE.connect() as conn:\n", - " cursor = conn.connection.cursor()\n", - " compiled = final_stmt.compile(\n", - " dialect=postgresql.dialect(),\n", - " compile_kwargs={\"render_postcompile\": True}\n", - " )\n", - " compiled_bound = cursor.mogrify(str(compiled), compiled.params)\n", - " sql = parse_one(compiled_bound.decode(\"utf-8\"))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "{'dbt_export_wins__wins_dataset_company_name': 'string[pyarrow]',\n 'dbt_export_wins__wins_dataset_cdms_reference': 'string[pyarrow]'}" - }, - "metadata": {}, - "execution_count": 3 - } - ], - "source": [ - "from cmf.data.utils import get_schema_table_names, string_to_dataset, string_to_table\n", - "from sqlalchemy import LABEL_STYLE_TABLENAME_PLUS_COL\n", - "from sqlalchemy.orm import Session\n", - "\n", - "def selector_to_datatypes(selector, engine):\n", - " types_dict = {}\n", - " for schema_table, fields in selector.items():\n", - " db_schema, db_table = get_schema_table_names(schema_table)\n", - " db_table = string_to_table(db_schema, db_table, engine=engine)\n", - " stmt = (\n", - " select(db_table.c[tuple(fields)])\n", - " .limit(1)\n", - " .set_label_style(LABEL_STYLE_TABLENAME_PLUS_COL)\n", - " )\n", - " with Session(engine) as session:\n", - " res = pd.read_sql(stmt, session.bind).convert_dtypes(\n", - " dtype_backend=\"pyarrow\"\n", - " )\n", - " types_dict = types_dict | res.dtypes.apply(lambda x: x.name).to_dict()\n", - " \n", - " return types_dict\n", - "\n", - "selector_to_datatypes(ew_selector, ENGINE)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": " data_sha1 \\\n0 b'O\\xa7\\xd1k\\x0f\\xed\\xb6R\\xe4X-w\\x01ag\\xaam\\xa... \n\n dbt_export_wins__wins_dataset_company_name \\\n0 Veolia Nuclear Solutions \n\n dbt_export_wins__wins_dataset_cdms_reference \n0 ORG-10039882 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
data_sha1dbt_export_wins__wins_dataset_company_namedbt_export_wins__wins_dataset_cdms_reference
0b'O\\xa7\\xd1k\\x0f\\xed\\xb6R\\xe4X-w\\x01ag\\xaam\\xa...Veolia Nuclear SolutionsORG-10039882
\n
" - }, - "metadata": {}, - "execution_count": 14 - } - ], - "source": [ - "from sqlalchemy.orm import Session\n", - "\n", - "with Session(ENGINE) as session:\n", - " res = pd.read_sql(\n", - " _selector_to_data(ew_selector, engine=ENGINE).limit(1),\n", - " session.bind\n", - " ).convert_dtypes(\n", - " dtype_backend=\"pyarrow\"\n", - " )\n", - "\n", - "res" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 1 µs, sys: 0 ns, total: 1 µs\nWall time: 5.01 µs\n\nRangeIndex: 57658 entries, 0 to 57657\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 cluster_sha1 57658 non-null string[pyarrow]\n 1 data_sha1 57658 non-null string[pyarrow]\n 2 dbt_export_wins__wins_dataset_company_name 57658 non-null string[pyarrow]\n 3 dbt_export_wins__wins_dataset_cdms_reference 57571 non-null string[pyarrow]\ndtypes: string[pyarrow](4)\nmemory usage: 7.1 MB\n" - } - ], - "source": [ - "%time\n", - "\n", - "import io\n", - "\n", - "selector_dtypes = selector_to_datatypes(ew_selector, ENGINE)\n", - "default_dtypes = {\n", - " \"cluster_sha1\": \"string[pyarrow]\",\n", - " \"data_sha1\": \"string[pyarrow]\"\n", - "}\n", - "\n", - "with ENGINE.connect() as conn:\n", - " cursor = conn.connection.cursor()\n", - " compiled = final_stmt.compile(\n", - " dialect=postgresql.dialect(),\n", - " compile_kwargs={\"render_postcompile\": True}\n", - " )\n", - " compiled_bound = cursor.mogrify(str(compiled), compiled.params)\n", - " sql = compiled_bound.decode(\"utf-8\")\n", - " copy_sql = f\"copy ({sql}) to stdout with csv header\"\n", - "\n", - " store = io.StringIO()\n", - " cursor.copy_expert(copy_sql, store)\n", - " store.seek(0)\n", - " \n", - " # res = pd.read_csv(store, dtype=default_dtypes | selector_dtypes)\n", - " res = pd.read_csv(\n", - " store, dtype=default_dtypes | selector_dtypes, engine=\"pyarrow\").convert_dtypes(dtype_backend=\"pyarrow\")\n", - "\n", - " # if \"data_sha1\" in res.columns:\n", - " # res.data_sha1 = res.data_sha1.str[2:].apply(bytes.fromhex)\n", - " # res.data_sha1 = res.data_sha1.astype(\"binary[pyarrow]\")\n", - " # if \"cluster_sha1\" in res.columns:\n", - " # res.cluster_sha1 = res.cluster_sha1.str[2:].apply(bytes.fromhex)\n", - " # res.cluster_sha1 = res.cluster_sha1.astype(\"binary[pyarrow]\")\n", - " \n", - "res.head(3)\n", - "res.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "20" - }, - "metadata": {}, - "execution_count": 38 - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "b'\\x02\\xd3\\xc3\\xfb\\x01KnG\\xc9,\\x07\\xb1\\xc1\\x11N\\xd7\\xb6\\xcb\\x1bq'" - }, - "metadata": {}, - "execution_count": 36 - } - ], - "source": [ - "bytes.fromhex(df.cluster_sha1[0][2:])" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "b'\\\\x02d3c3fb014b6e47c92c07b1c1114ed7b6cb1b71'" - }, - "metadata": {}, - "execution_count": 23 - } - ], - "source": [ - "x = df.cluster_sha1[0]\n", - "bytes(x.encode(\"utf-8\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "b'\\\\x02d3c3fb014b6e47c92c07b1c1114ed7b6cb1b71'" - }, - "metadata": {}, - "execution_count": 30 - } - ], - "source": [ - "x.encode(\"utf-8\")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "20" - }, - "metadata": {}, - "execution_count": 29 - } - ], - "source": [ - "import hashlib\n", - "len(hashlib.sha1().digest())" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 4 µs, sys: 0 ns, total: 4 µs\nWall time: 10.5 µs\n" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": "pyarrow.Table\ncluster_sha1: large_binary\ndata_sha1: large_binary\ndbt_export_wins__wins_dataset_company_name: string\ndbt_export_wins__wins_dataset_cdms_reference: string\n----\ncluster_sha1: [[02D3C3FB014B6E47C92C07B1C1114ED7B6CB1B71,04DF59ADAD74541BEDFD0677E94AF4097B808FD0,06C153B57088535ABC56D061FB54ADD36795CF5A,0AA1B08A949FA0743EE7175AF7ED5FCF315397FC,0F4ED090B49C9AB5DBF7F9AA4849F466B4F654CB,...,F86BCAFA29797916160A37F25ED8AA34B70C0FCE,F8FB7C510BF37E463D0BE09101BC29EE13CA8A71,FB74D423C8D75AAAFF822C60A44BB7E704820A3E,FC2D37EF721A256FD4C88CD1F5D3722C7192C047,FFA426C5EEC58E7630A0849F1039416B3C071AE8]]\ndata_sha1: [[02D3C3FB014B6E47C92C07B1C1114ED7B6CB1B71,26049ADA7E76BE753F46F0FD92A74950FAD49762,8C56B85BACA64B2C5DB196BFFE1A952B65B5A039,4B6181C1E38124BBED5DB64F3B74C6278531A824,0F4ED090B49C9AB5DBF7F9AA4849F466B4F654CB,...,7CBF24EED28CF959FAE9B1250518B59847643755,330BB80215A604659B5F3D897B26AE7C5C88E220,87581D5F0D58AF9E1A40DBDF2AF711C534A6AC81,1129391B5B703884860DC15E27386F9DC7A0B41B,37EEEADA6546F96D5EE506B435674B1568E5BAD8]]\ndbt_export_wins__wins_dataset_company_name: [[\"ETA Green Power Limited\",\"Med-Eq (Europe) Ltd\",\"Silver Lined Horizons Ltd\",\"Travelbee Ltd\",\"Hyde Sails\",...,\"AEROSERVICES LTD.\",\"Instarmac Group PLC\",\"Crush Creative Ltd.\",\"Stelfox UK Ltd\",\"Delf Freezer Wear Limited\"]]\ndbt_export_wins__wins_dataset_cdms_reference: [[\"Companies House ref: 12359858\",\"ORG-10109781\",\"ORG-10170829\",\"10010986\",\"00046947\",...,\"06403722\",\"01324925\",\"03610570\",\"ORG-10136650\",\"04368635\"]]" - }, - "metadata": {}, - "execution_count": 16 - } - ], - "source": [ - "%time\n", - "\n", - "df = cx.read_sql(\n", - " f\"postgresql://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\", \n", - " compiled_bound.decode(\"utf-8\"), \n", - " return_type=\"arrow\"\n", - ")\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%time\n", - "\n", - "df = cx.read_sql(\n", - " f\"postgresql://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\", \n", - " sql.sql(dialect=\"postgres\"), \n", - " return_type=\"polars\"\n", - ")\n", - "df.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE source_data_unnested AS (\n SELECT\n _team_cmf.cmf__source_data.sha1 AS sha1,\n UNNEST(_team_cmf.cmf__source_data.id) AS id,\n _team_cmf.cmf__source_data.dataset AS dataset\n FROM _team_cmf.cmf__source_data\n), anon_1 AS (\n SELECT\n source_data_unnested.sha1 AS data_sha1,\n dbt.export_wins__wins_dataset.company_name AS dbt_export_wins__wins_dataset_company_name,\n dbt.export_wins__wins_dataset.cdms_reference AS dbt_export_wins__wins_dataset_cdms_reference\n FROM source_data_unnested\n LEFT OUTER JOIN dbt.export_wins__wins_dataset\n ON source_data_unnested.id = CAST(dbt.export_wins__wins_dataset.id AS VARCHAR)\n AND source_data_unnested.dataset = CAST(CAST('cc89099f-d065-49cc-aa45-e08e1db6653a' AS UUID) AS UUID)\n WHERE\n NOT dbt.export_wins__wins_dataset.id IS NULL\n), allowed AS (\n SELECT\n _team_cmf.cmf__ddupe_contains.parent AS parent,\n _team_cmf.cmf__ddupe_contains.child AS child\n FROM _team_cmf.cmf__ddupe_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__ddupe_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n UNION\n SELECT\n _team_cmf.cmf__link_contains.parent AS parent,\n _team_cmf.cmf__link_contains.child AS child\n FROM _team_cmf.cmf__link_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__link_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_2\n ON _team_cmf.cmf__link_contains.child = cmf__clusters_2.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n), root AS (\n SELECT\n allowed.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN _team_cmf.cmf__clusters\n ON _team_cmf.cmf__clusters.sha1 = allowed.parent\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = _team_cmf.cmf__clusters.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 = CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA)\n), recurse(parent, child) AS (\n SELECT\n root.parent AS parent,\n root.child AS child\n FROM root\n UNION\n SELECT\n recurse.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN recurse\n ON allowed.parent = recurse.child\n)\nSELECT\n recurse.parent AS cluster_sha1,\n anon_1.data_sha1,\n anon_1.dbt_export_wins__wins_dataset_company_name,\n anon_1.dbt_export_wins__wins_dataset_cdms_reference\nFROM anon_1\nJOIN recurse\n ON recurse.child = anon_1.data_sha1\n" - } - ], - "source": [ - "print(sql.sql(dialect=\"postgres\", pretty=True))" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE \"allowed\" AS (\n SELECT\n \"_team_cmf\".\"cmf__ddupe_contains\".\"parent\" AS \"parent\",\n \"_team_cmf\".\"cmf__ddupe_contains\".\"child\" AS \"child\"\n FROM \"_team_cmf\".\"cmf__ddupe_contains\" AS \"cmf__ddupe_contains\"\n JOIN \"_team_cmf\".\"cmf__clusters\" AS \"cmf__clusters_1\"\n ON \"_team_cmf\".\"cmf__ddupe_contains\".\"parent\" = \"cmf__clusters_1\".\"sha1\"\n JOIN \"_team_cmf\".\"cmf__models_create_clusters\" AS \"cmf__models_create_clusters\"\n ON \"_team_cmf\".\"cmf__models_create_clusters\".\"child\" = \"cmf__clusters_1\".\"sha1\"\n JOIN \"_team_cmf\".\"cmf__models\" AS \"cmf__models\"\n ON \"_team_cmf\".\"cmf__models\".\"sha1\" = \"_team_cmf\".\"cmf__models_create_clusters\".\"parent\"\n AND \"_team_cmf\".\"cmf__models\".\"sha1\" IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n UNION\n SELECT\n \"_team_cmf\".\"cmf__link_contains\".\"parent\" AS \"parent\",\n \"_team_cmf\".\"cmf__link_contains\".\"child\" AS \"child\"\n FROM \"_team_cmf\".\"cmf__link_contains\" AS \"cmf__link_contains\"\n JOIN \"_team_cmf\".\"cmf__clusters\" AS \"cmf__clusters_1\"\n ON \"_team_cmf\".\"cmf__link_contains\".\"parent\" = \"cmf__clusters_1\".\"sha1\"\n JOIN \"_team_cmf\".\"cmf__clusters\" AS \"cmf__clusters_2\"\n ON \"_team_cmf\".\"cmf__link_contains\".\"child\" = \"cmf__clusters_2\".\"sha1\"\n JOIN \"_team_cmf\".\"cmf__models_create_clusters\" AS \"cmf__models_create_clusters\"\n ON \"_team_cmf\".\"cmf__models_create_clusters\".\"child\" = \"cmf__clusters_1\".\"sha1\"\n JOIN \"_team_cmf\".\"cmf__models\" AS \"cmf__models\"\n ON \"_team_cmf\".\"cmf__models\".\"sha1\" = \"_team_cmf\".\"cmf__models_create_clusters\".\"parent\"\n AND \"_team_cmf\".\"cmf__models\".\"sha1\" IN (CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA))\n), \"recurse\"(\"parent\", \"child\") AS (\n SELECT\n \"allowed\".\"parent\" AS \"parent\",\n \"allowed\".\"child\" AS \"child\"\n FROM \"allowed\" AS \"allowed\"\n JOIN \"_team_cmf\".\"cmf__clusters\" AS \"cmf__clusters\"\n ON \"_team_cmf\".\"cmf__clusters\".\"sha1\" = \"allowed\".\"parent\"\n JOIN \"_team_cmf\".\"cmf__models_create_clusters\" AS \"cmf__models_create_clusters\"\n ON \"_team_cmf\".\"cmf__clusters\".\"sha1\" = \"_team_cmf\".\"cmf__models_create_clusters\".\"child\"\n JOIN \"_team_cmf\".\"cmf__models\" AS \"cmf__models\"\n ON \"_team_cmf\".\"cmf__models\".\"sha1\" = \"_team_cmf\".\"cmf__models_create_clusters\".\"parent\"\n AND \"_team_cmf\".\"cmf__models\".\"sha1\" = CAST('\\x17a46e0ea365597922a07f6b7bb01b9956807b21' AS BYTEA)\n UNION\n SELECT\n \"recurse\".\"parent\" AS \"parent\",\n \"allowed\".\"child\" AS \"child\"\n FROM \"allowed\" AS \"allowed\"\n JOIN \"recurse\" AS \"recurse\"\n ON \"allowed\".\"parent\" = \"recurse\".\"child\"\n)\nSELECT\n \"recurse\".\"parent\" AS \"cluster_sha1\",\n \"_team_cmf\".\"cmf__source_data\".\"sha1\" AS \"data_sha1\",\n \"dbt\".\"export_wins__wins_dataset\".\"company_name\" AS \"dbt_export_wins__wins_dataset_company_name\",\n \"dbt\".\"export_wins__wins_dataset\".\"cdms_reference\" AS \"dbt_export_wins__wins_dataset_cdms_reference\"\nFROM \"_team_cmf\".\"cmf__source_data\" AS \"cmf__source_data\"\nLEFT JOIN \"dbt\".\"export_wins__wins_dataset\" AS \"export_wins__wins_dataset\"\n ON \"_team_cmf\".\"cmf__source_data\".\"dataset\" = CAST('cc89099f-d065-49cc-aa45-e08e1db6653a' AS UUID)\n AND UNNEST(\"_team_cmf\".\"cmf__source_data\".\"id\") = CAST(\"dbt\".\"export_wins__wins_dataset\".\"id\" AS VARCHAR)\nJOIN \"recurse\" AS \"recurse\"\n ON \"_team_cmf\".\"cmf__source_data\".\"sha1\" = \"recurse\".\"child\"\nWHERE\n NOT \"dbt\".\"export_wins__wins_dataset\".\"id\" IS NULL\n" - } - ], - "source": [ - "from sqlglot.optimizer import optimize\n", - "\n", - "optimised = optimize(\n", - " sql,\n", - " schema={\n", - " \"_team_cmf.cmf__source_data\": {\n", - " \"sha1\": \"BINARY\",\n", - " \"id\": \"STRING\",\n", - " \"dataset\": \"UUID\",\n", - " },\n", - " \"_team_cmf.cmf__clusters\": {\n", - " \"sha1\": \"BINARY\",\n", - " },\n", - " \"_team_cmf.cmf__models_create_clusters\": {\n", - " \"parent\": \"BINARY\",\n", - " \"child\": \"BINARY\",\n", - " },\n", - " \"_team_cmf.cmf__models\": {\n", - " \"sha1\": \"BINARY\",\n", - " },\n", - " \"_team_cmf.cmf__link_contains\": {\n", - " \"parent\": \"BINARY\",\n", - " \"child\": \"BINARY\",\n", - " },\n", - " \"_team_cmf.cmf__ddupe_contains\": {\n", - " \"parent\": \"BINARY\",\n", - " \"child\": \"BINARY\",\n", - " },\n", - " \"_team_cmf.cmf__clusters\": {\n", - " \"sha1\": \"BINARY\",\n", - " },\n", - " \"dbt.export_wins__wins_dataset\": {\n", - " \"id\": \"STRING\",\n", - " \"company_name\": \"STRING\",\n", - " \"cdms_reference\": \"STRING\",\n", - " }\n", - " }\n", - ")\n", - "\n", - "print(optimised.sql(dialect=\"postgres\", pretty=True))" - ] - }, - { - "source": [ - "# Companies House\n", - "\n", - "Should take forever -- timed out for me.\n", - "\n", - "And yet in PGAdmin, 2 mins. Wtf?!" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "source": [ - "model = \"naive_companies_house_v1\"\n", - "ch_selector = selector(\n", - " table=\"companieshouse.companies\",\n", - " fields=[\"company_name\", \"company_number\", \"postcode\"],\n", - ")\n", - "\n", - "# We want raw data with clusters attached\n", - "parent, child = _parent_to_tree(model, engine=ENGINE)\n", - "if len(parent) == 0:\n", - " raise ValueError(f\"Model {model} not found\")\n", - "tree = [parent] + child\n", - "reachable_stmt = _tree_to_reachable_stmt(tree)\n", - "lookup_stmt = _reachable_to_parent_data_stmt(reachable_stmt, parent)\n", - "data_stmt = _selector_to_data(ch_selector, engine=ENGINE).cte()\n", - "\n", - "final_stmt = select(lookup_stmt.c.parent.label(\"cluster_sha1\"), data_stmt).join(\n", - " lookup_stmt, lookup_stmt.c.child == data_stmt.c.data_sha1\n", - ")\n", - "\n", - "with ENGINE.connect() as conn:\n", - " cursor = conn.connection.cursor()\n", - " compiled = final_stmt.compile(\n", - " dialect=postgresql.dialect(),\n", - " compile_kwargs={\"render_postcompile\": True}\n", - " )\n", - " compiled_bound = cursor.mogrify(str(compiled), compiled.params)\n", - " sql = parse_one(compiled_bound.decode(\"utf-8\"))\n", - "\n", - "print(sql.sql(dialect=\"postgres\", pretty=True))" - ], - "cell_type": "code", - "metadata": { - "tags": [] - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "WITH RECURSIVE source_data_unnested AS (\n SELECT\n _team_cmf.cmf__source_data.sha1 AS sha1,\n UNNEST(_team_cmf.cmf__source_data.id) AS id,\n _team_cmf.cmf__source_data.dataset AS dataset\n FROM _team_cmf.cmf__source_data\n), anon_1 AS (\n SELECT\n source_data_unnested.sha1 AS data_sha1,\n companieshouse.companies.company_name AS companieshouse_companies_company_name,\n companieshouse.companies.company_number AS companieshouse_companies_company_number,\n companieshouse.companies.postcode AS companieshouse_companies_postcode\n FROM source_data_unnested\n LEFT OUTER JOIN companieshouse.companies\n ON source_data_unnested.id = CAST(companieshouse.companies.id AS VARCHAR)\n AND source_data_unnested.dataset = CAST(CAST('592b69e0-ce95-47a6-9f0a-bcd792f214a4' AS UUID) AS UUID)\n WHERE\n NOT companieshouse.companies.id IS NULL\n), allowed AS (\n SELECT\n _team_cmf.cmf__ddupe_contains.parent AS parent,\n _team_cmf.cmf__ddupe_contains.child AS child\n FROM _team_cmf.cmf__ddupe_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__ddupe_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA))\n UNION\n SELECT\n _team_cmf.cmf__link_contains.parent AS parent,\n _team_cmf.cmf__link_contains.child AS child\n FROM _team_cmf.cmf__link_contains\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_1\n ON _team_cmf.cmf__link_contains.parent = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__clusters AS cmf__clusters_2\n ON _team_cmf.cmf__link_contains.child = cmf__clusters_2.sha1\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = cmf__clusters_1.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 IN (CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA))\n), root AS (\n SELECT\n allowed.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN _team_cmf.cmf__clusters\n ON _team_cmf.cmf__clusters.sha1 = allowed.parent\n JOIN _team_cmf.cmf__models_create_clusters\n ON _team_cmf.cmf__models_create_clusters.child = _team_cmf.cmf__clusters.sha1\n JOIN _team_cmf.cmf__models\n ON _team_cmf.cmf__models_create_clusters.parent = _team_cmf.cmf__models.sha1\n WHERE\n _team_cmf.cmf__models.sha1 = CAST('\\x5666a21720152c92b6b89be7d61e336d4ca684bf' AS BYTEA)\n), recurse(parent, child) AS (\n SELECT\n root.parent AS parent,\n root.child AS child\n FROM root\n UNION\n SELECT\n recurse.parent AS parent,\n allowed.child AS child\n FROM allowed\n JOIN recurse\n ON allowed.parent = recurse.child\n)\nSELECT\n recurse.parent AS cluster_sha1,\n anon_1.data_sha1,\n anon_1.companieshouse_companies_company_name,\n anon_1.companieshouse_companies_company_number,\n anon_1.companieshouse_companies_postcode\nFROM anon_1\nJOIN recurse\n ON recurse.child = anon_1.data_sha1\n" - } - ] - }, - { - "source": [ - "Maybe let's try running this compiled SQL directly with SQLAlchemy." - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\nWall time: 4.77 µs\n 13915 function calls (12898 primitive calls) in 96.436 seconds\n\n Ordered by: cumulative time\n\n ncalls tottime percall cumtime percall filename:lineno(function)\n 1 0.000 0.000 96.429 96.429 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2245(execute)\n 1 0.000 0.000 96.429 96.429 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2078(_execute_internal)\n 1 0.000 0.000 96.429 96.429 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1377(execute)\n 1 0.000 0.000 96.429 96.429 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:505(_execute_on_connection)\n 1 0.000 0.000 96.429 96.429 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1593(_execute_clauseelement)\n 1 0.000 0.000 96.428 96.428 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1794(_execute_context)\n 1 0.000 0.000 96.428 96.428 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1853(_exec_single_context)\n 1 0.000 0.000 96.428 96.428 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:923(do_execute)\n 1 96.428 96.428 96.428 96.428 {method 'execute' of 'psycopg2.extensions.cursor' objects}\n 1 0.000 0.000 0.006 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:535(sql)\n 1 0.000 0.000 0.006 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:485(generate)\n 1 0.000 0.000 0.006 0.006 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:563(generate)\n 759/1 0.001 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:711(sql)\n 8/1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/transforms.py:592(_to_sql)\n 8/1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2182(select_sql)\n 129/12 0.000 0.000 0.004 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3168(expressions)\n 112/13 0.000 0.000 0.004 0.000 {method 'join' of 'str' objects}\n 10/1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1032(prepend_ctes)\n 1 0.000 0.000 0.004 0.004 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1038(with_sql)\n 13/6 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3185()\n 5 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1048(cte_sql)\n 5 0.000 0.000 0.004 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:662(wrap)\n 10 0.000 0.000 0.003 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2125(query_modifiers)\n 1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:284(copy)\n 11/1 0.000 0.000 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/copy.py:128(deepcopy)\n 1 0.001 0.001 0.002 0.002 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:250(__deepcopy__)\n 2 0.000 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2314(union_sql)\n 2 0.000 0.000 0.002 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2290(set_operations)\n 10 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2150()\n 13 0.000 0.000 0.002 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1843(join_sql)\n 17/15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3120(binary)\n 15 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3031(eq_sql)\n 53 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:770(column_sql)\n 2 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state_changes.py:95(_go)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1750(__exit__)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2423(close)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2525(_close_impl)\n 188 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:777()\n 1 0.000 0.000 0.001 0.001 :1(close)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1346(close)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2577(close)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2726(_do_close)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2710(_close_impl)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2704(_connection_rollback_impl)\n 1 0.000 0.000 0.001 0.001 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1116(_rollback_impl)\n 2 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:691(do_rollback)\n 2 0.001 0.000 0.001 0.000 {method 'rollback' of 'psycopg2.extensions.connection' objects}\n 198 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1249(identifier_sql)\n 6/5 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2797(cast_sql)\n 18 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2724(alias_sql)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/postgres.py:117(_datatype_sql)\n 389 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:96(__init__)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:3948(is_type)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:3902(build)\n 21 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1619(table_sql)\n 4 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2355(where_sql)\n 6 0.000 0.000 0.001 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/__init__.py:98(parse_one)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2761(and_sql)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2767(connector_sql)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:480(parse_into)\n 768 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2655(in_sql)\n 21 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1608(table_parts)\n 331 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:318(set)\n 56 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1609()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1149(parse_into)\n 1808 0.000 0.000 0.000 0.000 {built-in method builtins.hasattr}\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1788(from_sql)\n 250 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:146(text)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1185(_parse)\n 1990 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:494(tokenize)\n 1616 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n 198 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:202(name)\n 436 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:333(_set_parent)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:588()\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:945(tokenize)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2039(_connection_for_bind)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:964(_scan)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:3903(_parse_types)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1806(_setup_result_proxy)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1876(_setup_dml_or_text_result)\n 1 0.000 0.000 0.000 0.000 :1(_connection_for_bind)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:1083(_scan_keywords)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1107(_connection_for_bind)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1419(__init__)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1052(tablealias_sql)\n 198 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:1844(quoted)\n 396 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:631(maybe_comment)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:1299(_scan_var)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3256(connect)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/_elements_constructors.py:1565(text)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:1482(_init_metadata)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:131(__init__)\n 45 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:305(append)\n 479 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:2286(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:347(__init__)\n 1 0.000 0.000 0.000 0.000 {method 'sub' of 're.Pattern' objects}\n 26 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:2047(kind)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1221(close)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/transforms.py:382(eliminate_semi_and_anti_joins)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1507(close)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1384(_checkin)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:3281(raw_connection)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:496(_merge_cursor_description)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:917(_finalize_fairy)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:441(connect)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:625()\n 376 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1255(_checkout)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1445(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2721(not_sql)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:667(_compile_w_cache)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2533(expunge_all)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1276(_init_compiled)\n 20 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:296(get_or_raise)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2703(anonymous_sql)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:503(parser)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:1008(_advance)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:707(checkout)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3055(is_sql)\n 385 0.000 0.000 0.000 0.000 {method 'pop' of 'list' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:497(tokenizer)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:1048(_add)\n 198 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:426(can_identify)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3143(func)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1110(datatype_sql)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:770(_clean_thread_parent_frames)\n 198 0.000 0.000 0.000 0.000 {method 'lower' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3152(format_args)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1814(_autobegin_t)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1106(__init__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/helper.py:106(csv)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:751(checkin)\n 377 0.000 0.000 0.000 0.000 {built-in method builtins.callable}\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:2051(side)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1305(_advance)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:3153()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1264(oneshot)\n 13 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:2043(method)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:880(__init__)\n 202 0.000 0.000 0.000 0.000 {method 'replace' of 'str' objects}\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:437(__get__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:917(__init__)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2158()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:451(_return_conn)\n 204 0.000 0.000 0.000 0.000 {method 'isdigit' of 'str' objects}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:5992(_match_set)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:144(_do_return_conn)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:411(_generate_cache_key)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:794(_merge_cols_by_none)\n 51 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:125(this)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2177(after_limit_modifiers)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:506(generator)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:347(_generate_cache_key)\n 43 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:774(__hash__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:132(put)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/impl.py:153(_do_get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/state.py:459(_detach_states)\n 20 0.000 0.000 0.000 0.000 :1033(_handle_fromlist)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:623(seg)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1757(get_result_processor)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1980(literal_sql)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:648(_colnames_from_description)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:520(__init__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/encodings/utf_8.py:15(decode)\n 11 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/copy.py:242(_keep_alive)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:6029(_match_text_seq)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:174(get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1719(create_cursor)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2169(offset_limit_modifiers)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:183(_for_instance)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:159(__getattr__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:314(expect)\n 5 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/type_api.py:916(_cached_result_processor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1029(_take_snapshot)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:165(__setitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:798(begin)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1740(create_default_cursor)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:930(reset)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:180(_for_class)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1388(enumerate)\n 24 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/helper.py:47(seq_get)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1121(reset)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/cache_key.py:221(_gen_cache_key)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1483(cursor)\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:365()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2679(__init__)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:415(__getitem__)\n 45 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/helper.py:117()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/copy.py:200(_deepcopy_list)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/ipykernel/ipkernel.py:785()\n 77 0.000 0.000 0.000 0.000 {method 'upper' of 'str' objects}\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/types.py:171(__get__)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:137(__init__)\n 50 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:620(sep)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:360(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:526(get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2701(_connection_begin_impl)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/transforms.py:86(eliminate_qualify)\n 1 0.000 0.000 0.000 0.000 {method 'cursor' of 'psycopg2.extensions.connection' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1398(_reset)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/contextlib.py:123(__exit__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1424(_next)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:43(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:205(_effective_processors)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:368(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1094(_begin_impl)\n 30 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:688(indent)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:256(__enter__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/helper.py:63(ensure_list)\n 18 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:1044(_text)\n 43 0.000 0.000 0.000 0.000 {built-in method builtins.id}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:348(__new__)\n 2 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/trie.py:43(in_trie)\n 8 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:389(__bool__)\n 43 0.000 0.000 0.000 0.000 {built-in method builtins.hash}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1379()\n 15 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1849()\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:367()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:567(post_exec)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:187(_join)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1223(_set_memoized_attribute)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1220(check_errors)\n 30 0.000 0.000 0.000 0.000 {built-in method builtins.len}\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/tokens.py:409(__init__)\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:1986(escape_str)\n 6 0.000 0.000 0.000 0.000 {built-in method _codecs.utf_8_decode}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:182(_make_key_to_index)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:351(notify)\n 1 0.000 0.000 0.000 0.000 /home/theia/company-matching/cmf/data/utils/db.py:165(sqa_profiled)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:535(_still_open_and_dbapi_connection_is_valid)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:259(all_states)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:101(_should_log_debug)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:819(get_connection)\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:139(expressions)\n 22 0.000 0.000 0.000 0.000 {method 'strip' of 'str' objects}\n 7 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:1109(ident)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:1137(__get__)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:431(__getitem__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:228(_put)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2317(union_op)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/attr.py:374(__call__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/enum.py:792(value)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:1317(_retreat)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:95(get)\n 1 0.000 0.000 0.000 0.000 {method 'issuperset' of 'frozenset' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/compiler.py:1852(construct_params)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/threading.py:259(__exit__)\n 14 0.000 0.000 0.000 0.000 {method 'values' of 'dict' objects}\n 4 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:160(is_string)\n 30 0.000 0.000 0.000 0.000 {method 'isalnum' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py:570(_log_notices)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1487(__getattr__)\n 12 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:5980(_match)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1225(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:225(_full)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:231(_get)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/log.py:104(_should_log_info)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1602(executemany)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/logging/__init__.py:1689(isEnabledFor)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:597(preprocess)\n 1 0.000 0.000 0.000 0.000 :1(_generated_cache_key_traversal)\n 2 0.000 0.000 0.000 0.000 {method '__enter__' of '_thread.RLock' objects}\n 10 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2153()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/identity.py:48(_kill)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:470()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:570(connection)\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/expressions.py:132(expression)\n 6 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/parser.py:6003(_match_pair)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1010(_iterate_self_and_parents)\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.sorted}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/weakref.py:353(__init__)\n 8 0.000 0.000 0.000 0.000 {built-in method builtins.setattr}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/event/base.py:394(__init__)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:323()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/result.py:185()\n 3 0.000 0.000 0.000 0.000 {built-in method __new__ of type object at 0x562f7007e380}\n 12 0.000 0.000 0.000 0.000 {method 'isspace' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:2632(get_bind)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:516(_inc_counter)\n 1 0.000 0.000 0.000 0.000 :1()\n 2 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/util.py:105(_trans_ctx_check)\n 1 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/cursor.py:388()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/pool/base.py:1445(is_valid)\n 3 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/_collections.py:131(coerce_to_immutabledict)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:681(normalize_func)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1641(no_parameters)\n 2 0.000 0.000 0.000 0.000 {method '_is_owned' of '_thread.RLock' objects}\n 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2689(_deactivate_from_connection)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/queue.py:222(_empty)\n 3 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/typing.py:1375(cast)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1041(in_transaction)\n 1 0.000 0.000 0.000 0.000 {method '__exit__' of '_thread.lock' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:688(do_begin)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:980(_is_transaction_boundary)\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1747(__enter__)\n 4 0.000 0.000 0.000 0.000 {method 'keys' of 'dict' objects}\n 1 0.000 0.000 0.000 0.000 {method 'split' of 'str' objects}\n 1 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/generator.py:2007(null_sql)\n 1 0.000 0.000 0.000 0.000 {method 'popleft' of 'collections.deque' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1491()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlglot/dialects/dialect.py:323()\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/coercions.py:1152(_post_coercion)\n 1 0.000 0.000 0.000 0.000 {method 'setdefault' of 'dict' objects}\n 1 0.000 0.000 0.000 0.000 /opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1746(pre_exec)\n 1 0.000 0.000 0.000 0.000 {built-in method builtins.issubclass}\n\n\n\n" - } - ], - "source": [ - "%time\n", - "\n", - "from sqlalchemy.orm import Session\n", - "from sqlalchemy import text\n", - "\n", - "with sqa_profiled():\n", - " with Session(ENGINE) as session:\n", - " res = session.execute(text(sql.sql(dialect=\"postgres\")))" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": "(, , 'ARCADE GEEKS INT LTD', '13231865', 'DY13 9RH')" - }, - "metadata": {}, - "execution_count": 12 - } - ], - "source": [ - "res.first()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.16 64-bit ('company_matching': conda)", - "language": "python", - "name": "python_defaultSpec_1711550197230" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16-final" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/notebooks/engineering/WL_selector.ipynb b/notebooks/engineering/WL_selector.ipynb deleted file mode 100644 index 8bf1d5f..0000000 --- a/notebooks/engineering/WL_selector.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "d8019f5c-5446-46fa-90d3-b5db28541001", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "6c751528-6238-4f29-a9e4-79bf167d8308", - "metadata": {}, - "outputs": [], - "source": [ - "from cmf.data import ENGINE, SourceDataset\n", - "from cmf.data.utils import get_schema_table_names, string_to_table" - ] - }, - { - "cell_type": "markdown", - "id": "f701e3de-ee2a-4a61-b764-af9d3f34e91b", - "metadata": {}, - "source": [ - "# Testing selectors\n", - "\n", - "An area to adapt and test." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "d0b53b75-4d66-4977-92c0-15c837ada7f1", - "metadata": {}, - "outputs": [], - "source": [ - "table=\"companieshouse.companies\"\n", - "db_schema, db_table = get_schema_table_names(table, validate=True)\n", - "selected_table = string_to_table(\n", - " db_schema=db_schema,\n", - " db_table=db_table,\n", - " engine=ENGINE\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "10082b7c-3b96-46ba-aefa-9f25bdc3a225", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'companieshouse'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "'companies'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "selected_table.schema\n", - "selected_table.name" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_deduper.ipynb b/notebooks/models/WL_deduper.ipynb deleted file mode 100644 index ebc1d8e..0000000 --- a/notebooks/models/WL_deduper.ipynb +++ /dev/null @@ -1,1590 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4b9d18fd-bb52-415b-871c-728626594c00", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "25dd6661-910a-4d07-8149-cff950b0a208", - "metadata": {}, - "outputs": [], - "source": [ - "import uuid\n", - "from pathlib import Path\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "\n", - "import cmf.locations as loc\n", - "from cmf import make_deduper, process, query\n", - "from cmf.clean import company_name, company_number\n", - "from cmf.dedupers import Naive\n", - "from cmf.helpers import cleaner, cleaners\n", - "\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "b54f6f8f-32b1-45b7-ab4b-646b4d4e1ccb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'{ \"id\": \"data_sha1\", \"unique_fields\": [ \"a\", \"b\", ] }'" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - }, - { - "ename": "JSONDecodeError", - "evalue": "Expecting value: line 10 column 9 (char 150)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[39], line 19\u001b[0m\n\u001b[1;32m 6\u001b[0m template \u001b[38;5;241m=\u001b[39m Template(\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m \u001b[39m\u001b[38;5;124m{\u001b[39m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_sha1\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m }\u001b[39m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m)\n\u001b[1;32m 17\u001b[0m template\u001b[38;5;241m.\u001b[39mrender(fields\u001b[38;5;241m=\u001b[39mfields)\u001b[38;5;241m.\u001b[39mstrip()\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 19\u001b[0m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtemplate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrender\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfields\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 341\u001b[0m s \u001b[38;5;241m=\u001b[39m s\u001b[38;5;241m.\u001b[39mdecode(detect_encoding(s), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msurrogatepass\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONDecoder\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\u001b[38;5;28mself\u001b[39m, s, _w\u001b[38;5;241m=\u001b[39mWHITESPACE\u001b[38;5;241m.\u001b[39mmatch):\n\u001b[1;32m 333\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03m containing a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 353\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n", - "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 10 column 9 (char 150)" - ] - } - ], - "source": [ - "from jinja2 import Template\n", - "import json\n", - "\n", - "fields = [\"a\", \"b\"]\n", - "\n", - "template = Template(\"\"\"\n", - " {\n", - " \"id\": \"data_sha1\",\n", - " \"unique_fields\": [\n", - " {% for field in fields %}\n", - " \"{{ field }}\",\n", - " {% endfor %}\n", - " ]\n", - " }\n", - "\"\"\")\n", - "\n", - "template.render(fields=fields).strip().replace(\"\\n\", \"\")\n", - "\n", - "json.loads(template.render(fields=fields))" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "d06826cf-60d4-4916-9e82-8358f8f3cb1f", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(Path(loc.TEST, \"data\", \"all_companies.csv\")).reset_index(names=\"id\")\n", - "df[\"id\"] = df[\"id\"].apply(lambda x: uuid.UUID(int=x))" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "8b28fada-34b3-476e-af41-78c44f85e937", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namecrn
000000000-0000-0000-0000-000000000000People Limited01HHGX9BHARZT77WHVWCYJSWSF
000000000-0000-0000-0000-0000000003e8People UK01HHGX9BHARZT77WHVWCYJSWSF
000000000-0000-0000-0000-0000000007d0People Company01HHGX9BHARZT77WHVWCYJSWSF
\n", - "
" - ], - "text/plain": [ - " id company_name \\\n", - "0 00000000-0000-0000-0000-000000000000 People Limited \n", - "0 00000000-0000-0000-0000-0000000003e8 People UK \n", - "0 00000000-0000-0000-0000-0000000007d0 People Company \n", - "\n", - " crn \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF " - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_raw = df.filter([\"company_name\", \"crn\"])\n", - "df_crn = pd.concat(\n", - " [\n", - " df_raw.assign(company_name=lambda df: df[\"company_name\"] + \" Limited\"),\n", - " df_raw.assign(company_name=lambda df: df[\"company_name\"] + \" UK\"),\n", - " df_raw.assign(company_name=lambda df: df[\"company_name\"] + \" Company\"),\n", - " ]\n", - ")\n", - "\n", - "df_crn[\"id\"] = range(df_crn.shape[0])\n", - "df_crn = df_crn.filter([\"id\", \"company_name\", \"crn\"])\n", - "df_crn[\"id\"] = df_crn[\"id\"].apply(lambda x: uuid.UUID(int=x))\n", - "df_crn.query(\"company_name.str.lower().str.contains('people')\")" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "cfc2eb82-90fa-4ab3-beee-f18385d8e5fc", - "metadata": {}, - "outputs": [], - "source": [ - "# Clean\n", - "cleaner_name = cleaner(\n", - " function=company_name, arguments={\"column\": \"company_name\"}\n", - ")\n", - "cleaner_crn = cleaners(cleaner_name)\n", - "\n", - "df_cleaned = process(data=df_crn, pipeline=cleaner_crn)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "57af58ff-bd1b-44eb-a0cb-6de152974134", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1000" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_cleaned[[\"company_name\", \"crn\"]].drop_duplicates().shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "dfb82f95-0078-4f57-a202-29d9e4979885", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jovyan/company-matching/cmf/dedupers/make_deduper.py:22: UserWarning: For offline deduplication, the ID can be any field. \n", - "\n", - "When deduplicating to write back to the Company Matching Framework database, the ID must be data_sha1, generated by retrieving data with cmf.query().\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "# Dedupe\n", - "df_naive_deduper = make_deduper(\n", - " dedupe_run_name=\"basic_crn\",\n", - " description=\"Clean company name, company number\",\n", - " deduper=Naive,\n", - " deduper_settings={\n", - " \"id\": \"id\",\n", - " \"unique_fields\": [\"company_name\", \"crn\"],\n", - " },\n", - " data_source=\"foo\",\n", - " data=df_cleaned,\n", - ")\n", - "\n", - "df_deduped = df_naive_deduper()\n", - "\n", - "df_deduped_df = df_deduped.to_df()" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "b5a2f840-a6c7-49a9-8b8f-db72ba36ebeb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3000" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modelleftleft_idrightright_idprobability
0basic_crnfoo00000000-0000-0000-0000-000000000001foo00000000-0000-0000-0000-0000000007d11
1basic_crnfoo00000000-0000-0000-0000-000000000002foo00000000-0000-0000-0000-0000000007d21
2basic_crnfoo00000000-0000-0000-0000-000000000005foo00000000-0000-0000-0000-0000000007d51
\n", - "
" - ], - "text/plain": [ - " model left left_id right \\\n", - "0 basic_crn foo 00000000-0000-0000-0000-000000000001 foo \n", - "1 basic_crn foo 00000000-0000-0000-0000-000000000002 foo \n", - "2 basic_crn foo 00000000-0000-0000-0000-000000000005 foo \n", - "\n", - " right_id probability \n", - "0 00000000-0000-0000-0000-0000000007d1 1 \n", - "1 00000000-0000-0000-0000-0000000007d2 1 \n", - "2 00000000-0000-0000-0000-0000000007d5 1 " - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_deduped_df.shape[0]\n", - "df_deduped_df.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "d38bc203-0383-4874-aa9e-83aa261d487f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
left_idright_idprobability
000000000-0000-0000-0000-00000000000100000000-0000-0000-0000-0000000007d11
100000000-0000-0000-0000-00000000000200000000-0000-0000-0000-0000000007d21
200000000-0000-0000-0000-00000000000500000000-0000-0000-0000-0000000007d51
300000000-0000-0000-0000-00000000000b00000000-0000-0000-0000-0000000007db1
400000000-0000-0000-0000-00000000000c00000000-0000-0000-0000-0000000007dc1
............
299500000000-0000-0000-0000-0000000003a000000000-0000-0000-0000-0000000007881
299600000000-0000-0000-0000-0000000003d100000000-0000-0000-0000-0000000007b91
299700000000-0000-0000-0000-00000000039a00000000-0000-0000-0000-000000000b6a1
299800000000-0000-0000-0000-00000000078200000000-0000-0000-0000-000000000b6a1
299900000000-0000-0000-0000-00000000039a00000000-0000-0000-0000-0000000007821
\n", - "

3000 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " left_id \\\n", - "0 00000000-0000-0000-0000-000000000001 \n", - "1 00000000-0000-0000-0000-000000000002 \n", - "2 00000000-0000-0000-0000-000000000005 \n", - "3 00000000-0000-0000-0000-00000000000b \n", - "4 00000000-0000-0000-0000-00000000000c \n", - "... ... \n", - "2995 00000000-0000-0000-0000-0000000003a0 \n", - "2996 00000000-0000-0000-0000-0000000003d1 \n", - "2997 00000000-0000-0000-0000-00000000039a \n", - "2998 00000000-0000-0000-0000-000000000782 \n", - "2999 00000000-0000-0000-0000-00000000039a \n", - "\n", - " right_id probability \n", - "0 00000000-0000-0000-0000-0000000007d1 1 \n", - "1 00000000-0000-0000-0000-0000000007d2 1 \n", - "2 00000000-0000-0000-0000-0000000007d5 1 \n", - "3 00000000-0000-0000-0000-0000000007db 1 \n", - "4 00000000-0000-0000-0000-0000000007dc 1 \n", - "... ... ... \n", - "2995 00000000-0000-0000-0000-000000000788 1 \n", - "2996 00000000-0000-0000-0000-0000000007b9 1 \n", - "2997 00000000-0000-0000-0000-000000000b6a 1 \n", - "2998 00000000-0000-0000-0000-000000000b6a 1 \n", - "2999 00000000-0000-0000-0000-000000000782 1 \n", - "\n", - "[3000 rows x 3 columns]" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_deduped.dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "301b1354-5b03-4bde-98fc-f917d2fcc05d", - "metadata": {}, - "outputs": [], - "source": [ - "df_enriched = df_deduped.inspect_with_source(\n", - " left_data=df_cleaned, left_key=\"id\", right_data=df_cleaned, right_key=\"id\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "97d93ced-1a26-48e6-8675-c9dccc4057d5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
left_idright_idcompany_name_xcrn_xcompany_name_ycrn_y
74500000000-0000-0000-0000-00000000000000000000-0000-0000-0000-0000000007d0people01HHGX9BHARZT77WHVWCYJSWSFpeople01HHGX9BHARZT77WHVWCYJSWSF
198900000000-0000-0000-0000-00000000000000000000-0000-0000-0000-0000000003e8people01HHGX9BHARZT77WHVWCYJSWSFpeople01HHGX9BHARZT77WHVWCYJSWSF
247700000000-0000-0000-0000-0000000003e800000000-0000-0000-0000-0000000007d0people01HHGX9BHARZT77WHVWCYJSWSFpeople01HHGX9BHARZT77WHVWCYJSWSF
\n", - "
" - ], - "text/plain": [ - " left_id \\\n", - "745 00000000-0000-0000-0000-000000000000 \n", - "1989 00000000-0000-0000-0000-000000000000 \n", - "2477 00000000-0000-0000-0000-0000000003e8 \n", - "\n", - " right_id company_name_x \\\n", - "745 00000000-0000-0000-0000-0000000007d0 people \n", - "1989 00000000-0000-0000-0000-0000000003e8 people \n", - "2477 00000000-0000-0000-0000-0000000007d0 people \n", - "\n", - " crn_x company_name_y crn_y \n", - "745 01HHGX9BHARZT77WHVWCYJSWSF people 01HHGX9BHARZT77WHVWCYJSWSF \n", - "1989 01HHGX9BHARZT77WHVWCYJSWSF people 01HHGX9BHARZT77WHVWCYJSWSF \n", - "2477 01HHGX9BHARZT77WHVWCYJSWSF people 01HHGX9BHARZT77WHVWCYJSWSF " - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_enriched.query(\"company_name_x == 'people'\")" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "31940ecd-c5f8-418b-9070-89f1f1940783", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
left_idright_idleft_uniqueright_uniqueprobability
000000000-0000-0000-0000-00000000000000000000-0000-0000-0000-000000000000020001
100000000-0000-0000-0000-00000000000300000000-0000-0000-0000-000000000003320031
200000000-0000-0000-0000-00000000000600000000-0000-0000-0000-000000000006620061
300000000-0000-0000-0000-00000000000800000000-0000-0000-0000-000000000008820081
400000000-0000-0000-0000-00000000000f00000000-0000-0000-0000-00000000000f1520151
..................
99500000000-0000-0000-0000-00000000030f00000000-0000-0000-0000-00000000030f78327831
99600000000-0000-0000-0000-00000000034200000000-0000-0000-0000-00000000034283428341
99700000000-0000-0000-0000-00000000036700000000-0000-0000-0000-00000000036787128711
99800000000-0000-0000-0000-00000000037200000000-0000-0000-0000-00000000037288228821
99900000000-0000-0000-0000-0000000003c300000000-0000-0000-0000-0000000003c396329631
\n", - "

1000 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " left_id \\\n", - "0 00000000-0000-0000-0000-000000000000 \n", - "1 00000000-0000-0000-0000-000000000003 \n", - "2 00000000-0000-0000-0000-000000000006 \n", - "3 00000000-0000-0000-0000-000000000008 \n", - "4 00000000-0000-0000-0000-00000000000f \n", - ".. ... \n", - "995 00000000-0000-0000-0000-00000000030f \n", - "996 00000000-0000-0000-0000-000000000342 \n", - "997 00000000-0000-0000-0000-000000000367 \n", - "998 00000000-0000-0000-0000-000000000372 \n", - "999 00000000-0000-0000-0000-0000000003c3 \n", - "\n", - " right_id left_unique right_unique \\\n", - "0 00000000-0000-0000-0000-000000000000 0 2000 \n", - "1 00000000-0000-0000-0000-000000000003 3 2003 \n", - "2 00000000-0000-0000-0000-000000000006 6 2006 \n", - "3 00000000-0000-0000-0000-000000000008 8 2008 \n", - "4 00000000-0000-0000-0000-00000000000f 15 2015 \n", - ".. ... ... ... \n", - "995 00000000-0000-0000-0000-00000000030f 783 2783 \n", - "996 00000000-0000-0000-0000-000000000342 834 2834 \n", - "997 00000000-0000-0000-0000-000000000367 871 2871 \n", - "998 00000000-0000-0000-0000-000000000372 882 2882 \n", - "999 00000000-0000-0000-0000-0000000003c3 963 2963 \n", - "\n", - " probability \n", - "0 1 \n", - "1 1 \n", - "2 1 \n", - "3 1 \n", - "4 1 \n", - ".. ... \n", - "995 1 \n", - "996 1 \n", - "997 1 \n", - "998 1 \n", - "999 1 \n", - "\n", - "[1000 rows x 5 columns]" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import duckdb\n", - "\n", - "df_cleaned_2 = df_cleaned.copy()\n", - "\n", - "join_clause = []\n", - "for field in [\"company_name\", \"crn\"]:\n", - " join_clause.append(f\"l.{field} = r.{field}\")\n", - "join_clause_compiled = \" and \".join(join_clause)\n", - "\n", - "df_cleaned_2[\"_unique_e4003b\"] = range(df_cleaned_2.shape[0])\n", - "\n", - "duckdb.sql(\n", - " f\"\"\"\n", - " select distinct on (list_sort([raw.left_id, raw.right_id]))\n", - " raw.left_id,\n", - " raw.right_id,\n", - " raw.left_unique,\n", - " raw.right_unique,\n", - " 1 as probability\n", - " from (\n", - " select\n", - " l.id as left_id,\n", - " r.id as right_id,\n", - " l._unique_e4003b as left_unique,\n", - " r._unique_e4003b as right_unique\n", - " from\n", - " df_cleaned_2 l\n", - " inner join df_cleaned_2 r on\n", - " (\n", - " {join_clause_compiled}\n", - " )\n", - " ) raw;\n", - "\"\"\"\n", - ").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f3054087-2e5e-4941-90b8-5fd13922e433", - "metadata": {}, - "outputs": [], - "source": [ - "# Select\n", - "dh = query(\n", - " selector={\n", - " \"dit.data_hub__companies\": [\n", - " \"id\",\n", - " \"name\",\n", - " \"company_number\",\n", - " ]\n", - " },\n", - " model=None,\n", - " return_type=\"pandas\",\n", - ")\n", - "\n", - "# Clean\n", - "col_prefix = \"dit_data_hub__companies_\"\n", - "\n", - "cleaner_name = cleaner(function=company_name, arguments={\"column\": f\"{col_prefix}name\"})\n", - "cleaner_crn = cleaner(\n", - " function=company_number, arguments={\"column\": f\"{col_prefix}company_number\"}\n", - ")\n", - "cleaner_name_dh = cleaners(cleaner_name, cleaner_crn)\n", - "\n", - "dh_cleaned = process(data=dh, pipeline=cleaner_name_dh)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "be039ed3-f615-4b04-ac34-432e6d21d325", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(503449, 4)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "(503449, 4)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh.shape\n", - "dh_cleaned.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4458d1df-7366-416b-833b-d7f7dbdc04be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
data_sha1dit_data_hub__companies_iddit_data_hub__companies_namedit_data_hub__companies_company_number
0[196, 247, 190, 128, 184, 190, 103, 122, 20, 4...00002c8e-591a-e711-88ee-e4115bead28aarensis corpNone
1[24, 61, 93, 182, 46, 163, 186, 32, 56, 37, 47...000042c1-a098-e211-a939-e4115bead28amacrogen koreaNone
2[88, 139, 37, 72, 135, 153, 140, 176, 249, 217...00008a29-e155-e411-985c-e4115bead28apixsan digital softwareNone
\n", - "
" - ], - "text/plain": [ - " data_sha1 \\\n", - "0 [196, 247, 190, 128, 184, 190, 103, 122, 20, 4... \n", - "1 [24, 61, 93, 182, 46, 163, 186, 32, 56, 37, 47... \n", - "2 [88, 139, 37, 72, 135, 153, 140, 176, 249, 217... \n", - "\n", - " dit_data_hub__companies_id dit_data_hub__companies_name \\\n", - "0 00002c8e-591a-e711-88ee-e4115bead28a arensis corp \n", - "1 000042c1-a098-e211-a939-e4115bead28a macrogen korea \n", - "2 00008a29-e155-e411-985c-e4115bead28a pixsan digital software \n", - "\n", - " dit_data_hub__companies_company_number \n", - "0 None \n", - "1 None \n", - "2 None " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_cleaned.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "a59f4804-4f2e-4775-9c84-b0271c9e3f53", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "482602" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_cleaned[\n", - " [\"dit_data_hub__companies_name\", \"dit_data_hub__companies_company_number\"]\n", - "].drop_duplicates().shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d0ff4415-6066-4627-a92c-d21baa06767b", - "metadata": {}, - "outputs": [], - "source": [ - "# Dedupe\n", - "dh_naive_deduper = make_deduper(\n", - " dedupe_run_name=\"basic_dh\",\n", - " description=\"\"\"\n", - " Clean company name, company number\n", - " \"\"\",\n", - " deduper=Naive,\n", - " deduper_settings={\n", - " \"id\": f\"data_sha1\",\n", - " \"unique_fields\": [f\"{col_prefix}name\", f\"{col_prefix}company_number\"],\n", - " },\n", - " data_source=\"dit.data_hub__companies\",\n", - " data=dh_cleaned,\n", - ")\n", - "\n", - "dh_deduped = dh_naive_deduper()\n", - "\n", - "dh_deduped_df = dh_deduped.to_df()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "48a7cfed-9a89-40b0-b4fc-47310e2d66de", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modelleftleft_idrightright_idprobability
0basic_dhdit.data_hub__companies[159, 88, 93, 114, 229, 226, 159, 80, 204, 168...dit.data_hub__companies[134, 155, 152, 206, 81, 64, 135, 99, 204, 197...1
1basic_dhdit.data_hub__companies[5, 206, 209, 57, 155, 53, 4, 205, 147, 11, 13...dit.data_hub__companies[9, 74, 19, 18, 34, 88, 59, 107, 19, 210, 37, ...1
2basic_dhdit.data_hub__companies[85, 19, 108, 225, 134, 92, 105, 217, 244, 86,...dit.data_hub__companies[231, 248, 107, 96, 178, 253, 194, 43, 216, 8,...1
3basic_dhdit.data_hub__companies[72, 218, 48, 38, 233, 143, 9, 226, 204, 151, ...dit.data_hub__companies[25, 88, 248, 207, 122, 85, 22, 187, 41, 99, 4...1
4basic_dhdit.data_hub__companies[95, 15, 232, 74, 123, 239, 149, 20, 69, 201, ...dit.data_hub__companies[236, 26, 4, 121, 247, 204, 59, 212, 162, 32, ...1
\n", - "
" - ], - "text/plain": [ - " model left \\\n", - "0 basic_dh dit.data_hub__companies \n", - "1 basic_dh dit.data_hub__companies \n", - "2 basic_dh dit.data_hub__companies \n", - "3 basic_dh dit.data_hub__companies \n", - "4 basic_dh dit.data_hub__companies \n", - "\n", - " left_id right \\\n", - "0 [159, 88, 93, 114, 229, 226, 159, 80, 204, 168... dit.data_hub__companies \n", - "1 [5, 206, 209, 57, 155, 53, 4, 205, 147, 11, 13... dit.data_hub__companies \n", - "2 [85, 19, 108, 225, 134, 92, 105, 217, 244, 86,... dit.data_hub__companies \n", - "3 [72, 218, 48, 38, 233, 143, 9, 226, 204, 151, ... dit.data_hub__companies \n", - "4 [95, 15, 232, 74, 123, 239, 149, 20, 69, 201, ... dit.data_hub__companies \n", - "\n", - " right_id probability \n", - "0 [134, 155, 152, 206, 81, 64, 135, 99, 204, 197... 1 \n", - "1 [9, 74, 19, 18, 34, 88, 59, 107, 19, 210, 37, ... 1 \n", - "2 [231, 248, 107, 96, 178, 253, 194, 43, 216, 8,... 1 \n", - "3 [25, 88, 248, 207, 122, 85, 22, 187, 41, 99, 4... 1 \n", - "4 [236, 26, 4, 121, 247, 204, 59, 212, 162, 32, ... 1 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_deduped_df.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1963d05e-4cb1-4e62-b1e6-3e0067c938eb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 [159, 88, 93, 114, 229, 226, 159, 80, 204, 168...\n", - "1 [5, 206, 209, 57, 155, 53, 4, 205, 147, 11, 13...\n", - "2 [85, 19, 108, 225, 134, 92, 105, 217, 244, 86,...\n", - "3 [72, 218, 48, 38, 233, 143, 9, 226, 204, 151, ...\n", - "4 [95, 15, 232, 74, 123, 239, 149, 20, 69, 201, ...\n", - " ... \n", - "2161 [170, 14, 152, 42, 218, 117, 226, 101, 119, 18...\n", - "2162 [117, 142, 93, 47, 102, 98, 70, 24, 135, 242, ...\n", - "2163 [26, 121, 21, 138, 127, 213, 138, 94, 227, 191...\n", - "2164 [101, 86, 133, 145, 94, 225, 224, 86, 213, 43,...\n", - "2165 [223, 178, 145, 11, 190, 234, 71, 40, 27, 80, ...\n", - "Name: left_id, Length: 2166, dtype: object" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_deduped.dataframe[\"left_id\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "157e2c5e-c326-4f2b-b7d9-3c4eed83850f", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "setting an array element with a sequence", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdh_deduped\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataframe\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mleft_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mbytes\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/generic.py:6637\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 6631\u001b[0m results \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 6632\u001b[0m ser\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy, errors\u001b[38;5;241m=\u001b[39merrors) \u001b[38;5;28;01mfor\u001b[39;00m _, ser \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 6633\u001b[0m ]\n\u001b[1;32m 6635\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6636\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 6637\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6638\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor_from_mgr(new_data, axes\u001b[38;5;241m=\u001b[39mnew_data\u001b[38;5;241m.\u001b[39maxes)\n\u001b[1;32m 6639\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:431\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 429\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 431\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 432\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 433\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 434\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 436\u001b[0m \u001b[43m \u001b[49m\u001b[43musing_cow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musing_copy_on_write\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 437\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/managers.py:364\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, **kwargs)\u001b[0m\n\u001b[1;32m 362\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 364\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 365\u001b[0m result_blocks \u001b[38;5;241m=\u001b[39m extend_blocks(applied, result_blocks)\n\u001b[1;32m 367\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mfrom_blocks(result_blocks, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/internals/blocks.py:758\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors, using_cow, squeeze)\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan not squeeze with more than one column.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 756\u001b[0m values \u001b[38;5;241m=\u001b[39m values[\u001b[38;5;241m0\u001b[39m, :] \u001b[38;5;66;03m# type: ignore[call-overload]\u001b[39;00m\n\u001b[0;32m--> 758\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 760\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 762\u001b[0m refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:237\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 234\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 237\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 239\u001b[0m \u001b[38;5;66;03m# e.g. _astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:182\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 179\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 182\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43m_astype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:133\u001b[0m, in \u001b[0;36m_astype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mor\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m:\n\u001b[1;32m 132\u001b[0m \u001b[38;5;66;03m# Explicit copy, or required since NumPy can't view from / to object.\u001b[39;00m\n\u001b[0;32m--> 133\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n", - "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence" - ] - } - ], - "source": [ - "dh_deduped.dataframe[\"left_id\"].astype(bytes)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e713dd80-5f45-4e57-9eaf-de31d13f7d00", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
left_idright_id
0b'\\x9fX]r\\xe5\\xe2\\x9fP\\xcc\\xa8\\xaaL~\\xa1\\\\\\xfc...b'\\x86\\x9b\\x98\\xceQ@\\x87c\\xcc\\xc5\\xcb]\\xfeA\\xf...
1b'\\x05\\xce\\xd19\\x9b5\\x04\\xcd\\x93\\x0b\\x89\\xe7^\\...b'\\tJ\\x13\\x12\"X;k\\x13\\xd2%\\x0cj\\x18\\xe6\\x9e\\x1...
2b'U\\x13l\\xe1\\x86\\\\i\\xd9\\xf4V\\x95\\x8d\\x8aB\\x1d\\...b'\\xe7\\xf8k`\\xb2\\xfd\\xc2+\\xd8\\x08\\xa0\\xb4\\xd2\\...
3b'H\\xda0&\\xe9\\x8f\\t\\xe2\\xcc\\x97\\x03C|bv\\x9b\\x0...b'\\x19X\\xf8\\xcfzU\\x16\\xbb)c(F\\x85\\x0e\\xf0AJ\\xf...
4b'_\\x0f\\xe8J{\\xef\\x95\\x14E\\xc9\\xa2\\x1e5;*>\\xd2...b'\\xec\\x1a\\x04y\\xf7\\xcc;\\xd4\\xa2 \\xdfH\\xa4\\xe3...
.........
2161b'\\xaa\\x0e\\x98*\\xdau\\xe2ew\\xb4\\x85S[\\xdfb\\xb1\\...b'\\xf1\\x8e@\\x86\\xc1\\xab\\xd1\\xda\\xe6\\x8c\\x80v\\x...
2162b'u\\x8e]/fbF\\x18\\x87\\xf2\\r\\x86\\xf8\\x95\\xdd\\xb8...b'\\x92\\x8c(\\xbd\\xbf\\x06\\xc4\\xcbJCu\\x17\\xe9\\x89...
2163b'\\x1ay\\x15\\x8a\\x7f\\xd5\\x8a^\\xe3\\xbf\\x1b\\x1d(\\...b'\\xb1\\x03\\x01\\x86\\x16\\x85\\x8dT/\\xe7}j\\xc4~q\\x...
2164b'eV\\x85\\x91^\\xe1\\xe0V\\xd5+\\xba\\xb0\\xd0L&\\xc4=...b'\\x0eEp\\x89\\x1d;\\xa2\\x97\\xd3} CRN\\xa6\\xed\\x8f...
2165b'\\xdf\\xb2\\x91\\x0b\\xbe\\xeaG(\\x1bP\\xccy\\x14\\xa1...b\"f\\x01j\\xd4\\xf0\\x83\\x8d\\xe5q'\\xcc\\x137\\xeb\\x1...
\n", - "

2166 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " left_id \\\n", - "0 b'\\x9fX]r\\xe5\\xe2\\x9fP\\xcc\\xa8\\xaaL~\\xa1\\\\\\xfc... \n", - "1 b'\\x05\\xce\\xd19\\x9b5\\x04\\xcd\\x93\\x0b\\x89\\xe7^\\... \n", - "2 b'U\\x13l\\xe1\\x86\\\\i\\xd9\\xf4V\\x95\\x8d\\x8aB\\x1d\\... \n", - "3 b'H\\xda0&\\xe9\\x8f\\t\\xe2\\xcc\\x97\\x03C|bv\\x9b\\x0... \n", - "4 b'_\\x0f\\xe8J{\\xef\\x95\\x14E\\xc9\\xa2\\x1e5;*>\\xd2... \n", - "... ... \n", - "2161 b'\\xaa\\x0e\\x98*\\xdau\\xe2ew\\xb4\\x85S[\\xdfb\\xb1\\... \n", - "2162 b'u\\x8e]/fbF\\x18\\x87\\xf2\\r\\x86\\xf8\\x95\\xdd\\xb8... \n", - "2163 b'\\x1ay\\x15\\x8a\\x7f\\xd5\\x8a^\\xe3\\xbf\\x1b\\x1d(\\... \n", - "2164 b'eV\\x85\\x91^\\xe1\\xe0V\\xd5+\\xba\\xb0\\xd0L&\\xc4=... \n", - "2165 b'\\xdf\\xb2\\x91\\x0b\\xbe\\xeaG(\\x1bP\\xccy\\x14\\xa1... \n", - "\n", - " right_id \n", - "0 b'\\x86\\x9b\\x98\\xceQ@\\x87c\\xcc\\xc5\\xcb]\\xfeA\\xf... \n", - "1 b'\\tJ\\x13\\x12\"X;k\\x13\\xd2%\\x0cj\\x18\\xe6\\x9e\\x1... \n", - "2 b'\\xe7\\xf8k`\\xb2\\xfd\\xc2+\\xd8\\x08\\xa0\\xb4\\xd2\\... \n", - "3 b'\\x19X\\xf8\\xcfzU\\x16\\xbb)c(F\\x85\\x0e\\xf0AJ\\xf... \n", - "4 b'\\xec\\x1a\\x04y\\xf7\\xcc;\\xd4\\xa2 \\xdfH\\xa4\\xe3... \n", - "... ... \n", - "2161 b'\\xf1\\x8e@\\x86\\xc1\\xab\\xd1\\xda\\xe6\\x8c\\x80v\\x... \n", - "2162 b'\\x92\\x8c(\\xbd\\xbf\\x06\\xc4\\xcbJCu\\x17\\xe9\\x89... \n", - "2163 b'\\xb1\\x03\\x01\\x86\\x16\\x85\\x8dT/\\xe7}j\\xc4~q\\x... \n", - "2164 b'\\x0eEp\\x89\\x1d;\\xa2\\x97\\xd3} CRN\\xa6\\xed\\x8f... \n", - "2165 b\"f\\x01j\\xd4\\xf0\\x83\\x8d\\xe5q'\\xcc\\x137\\xeb\\x1... \n", - "\n", - "[2166 rows x 2 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_deduped.dataframe.filter([\"left_id\", \"right_id\"]).map(bytes)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "c0c06b3c-f673-4bf8-a979-0264471ed9e3", - "metadata": {}, - "outputs": [], - "source": [ - "# Dedupe\n", - "dh_naive_deduper2 = make_deduper(\n", - " dedupe_run_name=\"basic_dh\",\n", - " description=\"\"\"\n", - " Clean company name, company number\n", - " \"\"\",\n", - " deduper=Naive,\n", - " deduper_settings={\n", - " \"id\": \"data_sha1\",\n", - " \"unique_fields\": [f\"{col_prefix}name\", f\"{col_prefix}company_number\"],\n", - " },\n", - " data_source=\"dit.data_hub__companies\",\n", - " data=dh_cleaned,\n", - ")\n", - "\n", - "dh_deduped2 = dh_naive_deduper2()\n", - "\n", - "dh_deduped_df2 = dh_deduped2.to_df()" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "18284c26-7d8f-40f8-a7eb-efb038f1f2f8", - "metadata": {}, - "outputs": [], - "source": [ - "from sqlalchemy.orm import Session\n", - "\n", - "from cmf.data import ENGINE, SourceData\n", - "\n", - "with Session(ENGINE) as session:\n", - " data_inner_join = session.query(SourceData).limit(10).all()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "3150b437-e674-41d8-b057-56af3f04f987", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_inner_join" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "90cfc337-388a-4bec-b50e-05d97406ca79", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "bytearray(b\"bytearray(b\\'\\\\x0c\\\\xa6*\\\\x8e\\\\x00:\\\\xd7\\\\xd9^\\\\x0fF\\\\x82\\\\xa7\\\\x89}\\\\xe6Fb\\\\x93\\\\x87\\')\")" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "\"bytearray(b'\\\\x0c\\\\xa6*\\\\x8e\\\\x00:\\\\xd7\\\\xd9^\\\\x0fF\\\\x82\\\\xa7\\\\x89}\\\\xe6Fb\\\\x93\\\\x87')\"" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 2166 entries, 0 to 2165\n", - "Data columns (total 3 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 left_id 2166 non-null object\n", - " 1 right_id 2166 non-null object\n", - " 2 probability 2166 non-null int32 \n", - "dtypes: int32(1), object(2)\n", - "memory usage: 42.4+ KB\n" - ] - } - ], - "source": [ - "bytearray(dh_deduped2.dataframe[\"left_id\"][0].encode())\n", - "dh_deduped2.dataframe[\"left_id\"][0]\n", - "dh_deduped2.dataframe.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "0d980667-5f9e-442b-ba69-4c68e713bebf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", - " ... \n", - "2161 \n", - "2162 \n", - "2163 \n", - "2164 \n", - "2165 \n", - "Name: left_id, Length: 2166, dtype: object" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_deduped2.dataframe[\"left_id\"].apply(type)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a8c6881e-5620-4d92-9502-5953efe72d6a", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 2166 entries, 0 to 2165\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 model 2166 non-null object\n", - " 1 left 2166 non-null object\n", - " 2 left_id 2166 non-null object\n", - " 3 right 2166 non-null object\n", - " 4 right_id 2166 non-null object\n", - " 5 probability 2166 non-null int32 \n", - "dtypes: int32(1), object(5)\n", - "memory usage: 93.2+ KB\n" - ] - } - ], - "source": [ - "dh_deduped_df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "eb76f86d-2799-4d50-8a05-46bad0ab57d4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jovyan/company-matching/cmf/data/results.py:158: UserWarning: DataFrame columns are not unique, some columns will be omitted.\n", - " df.assign(\n" - ] - }, - { - "data": { - "text/plain": [ - "[{'model': 'basic_dh',\n", - " 'left': UUID('34f3e1b5-f612-e611-9bdc-e4115bead28a'),\n", - " 'right': UUID('6bd85b41-ebd2-43ad-99b9-399fca511176'),\n", - " 'probability': 1,\n", - " 'sha1': b']\\xff\\x1c``\\xad\\t:[\\x80\\x83\\xa6\\xc43x\\x0f!\\n\\xc7\\x8d'},\n", - " {'model': 'basic_dh',\n", - " 'left': UUID('34779711-2a85-4fea-b4e1-07226cc10425'),\n", - " 'right': UUID('6f3201cf-d483-4ce2-8c2c-c20e74a11f97'),\n", - " 'probability': 1,\n", - " 'sha1': b'5\\x1c*m&\\x96Y\\xda\\x0c\\xfd5\\xde\\xf9\\xf4\\x83\\t2N@)'},\n", - " {'model': 'basic_dh',\n", - " 'left': UUID('35519dfa-3c1a-4389-a452-141e7e84a289'),\n", - " 'right': UUID('0a83eefa-68b2-4852-b0fa-edf08828debf'),\n", - " 'probability': 1,\n", - " 'sha1': b'\\xc0\\xaf\\xe1\\x03\\xec\\xc9\\x1a\\x98\\x1d\\xba\\xaaV\\x88JIw\\xfbo\\x03\\xde'},\n", - " {'model': 'basic_dh',\n", - " 'left': UUID('35cb9542-1a51-4f32-b614-c5f77878a3f2'),\n", - " 'right': UUID('c3247c4f-4ee1-4500-a43c-61843964bc9e'),\n", - " 'probability': 1,\n", - " 'sha1': b'\\x05\\xf8\\xba\\xad\\xd7,\\xcaT\\xbdVY\\x04C\\x88a\\x9a\\xd83x\\x93'},\n", - " {'model': 'basic_dh',\n", - " 'left': UUID('3606e768-538b-e611-be23-e4115bead28a'),\n", - " 'right': UUID('7cf553b5-a098-e211-a939-e4115bead28a'),\n", - " 'probability': 1,\n", - " 'sha1': b'\\xfc\\xb4\\x9e\\x10\\xa1J5x\\xae\\xd6\\x98\\xac\\xce\\xac\\xbb\\xe8D\\xee\\x01\\x9c'}]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dh_deduped._prep_to_cmf(dh_deduped_df)[:5]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_deterministic-tests.ipynb b/notebooks/models/WL_deterministic-tests.ipynb deleted file mode 100644 index 1b42ffc..0000000 --- a/notebooks/models/WL_deterministic-tests.ipynb +++ /dev/null @@ -1,721 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b239cd5d-5c6b-4370-9e9f-662ffae4d58f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "865b59b8-fc8e-4402-97c5-1192dba6fd42", - "metadata": {}, - "source": [ - "# Deterministic linker\n", - "\n", - "A place to fix and test the deterministic linker." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "bb0f2cfd-4aa9-483c-99fa-49152aefaad0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:git.cmd:Popen(['git', 'version'], cwd=/home/jovyan/company-matching/notebooks, universal_newlines=False, shell=None, istream=None)\n", - "DEBUG:git.cmd:Popen(['git', 'version'], cwd=/home/jovyan/company-matching/notebooks, universal_newlines=False, shell=None, istream=None)\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf import locations as loc\n", - "from cmf.data import utils as du\n", - "from cmf.data.star import Star\n", - "from cmf.data.datasets import Dataset\n", - "from cmf.data.probabilities import Probabilities\n", - "from cmf.data.clusters import Clusters\n", - "from cmf.data.validation import Validation\n", - "from cmf.link.deterministic_linker import DeterministicLinker\n", - "from cmf.features.clean_complex import duckdb_cleaning_factory\n", - "from cmf.features.clean_basic_original import (\n", - " cms_original_clean_company_name_general,\n", - " cms_original_clean_company_name_ch,\n", - " cms_original_clean_postcode,\n", - " cms_original_clean_email,\n", - " cms_original_clean_ch_id,\n", - " cms_original_clean_cdms_id\n", - ")\n", - "\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "import duckdb\n", - "from pathlib import Path\n", - "\n", - "dotenv_path = find_dotenv()\n", - "load_dotenv(dotenv_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a47bb693-085a-431e-a14b-ab7acf55f773", - "metadata": {}, - "outputs": [], - "source": [ - "star = Star(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"STAR_TABLE\")\n", - ")\n", - "probabilities = Probabilities(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"PROBABILITIES_TABLE\"),\n", - " star = star\n", - ")\n", - "clusters = Clusters(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"CLUSTERS_TABLE\"),\n", - " star = star\n", - ")\n", - "validation = Validation(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"VALIDATE_TABLE\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f11719a3-9023-4683-8664-542988bd81b3", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp = DeterministicLinker(\n", - " name=\"n1_deterministic_basic\",\n", - " dataset = Dataset(\n", - " star_id=54717,\n", - " star=star\n", - " ), \n", - " probabilities=probabilities, \n", - " clusters=clusters, \n", - " n=1,\n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5c5aa81e-db8a-4b4b-806b-e5f3f0b3a5d7", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.get_data(\n", - " # sample=5,\n", - " cluster_select={\n", - " '\"companieshouse\".\"companies\"': [\n", - " \"company_name as company_name\",\n", - " \"postcode as postcode\"\n", - " ]\n", - " },\n", - " dim_select=[\n", - " \"id\",\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4e60f7b1-b6f0-4b1d-8610-b9ee861decf5", - "metadata": {}, - "outputs": [], - "source": [ - "clean_postcode = duckdb_cleaning_factory(cms_original_clean_postcode)\n", - "clean_ch_name = duckdb_cleaning_factory(cms_original_clean_company_name_ch)\n", - "clean_gen_name = duckdb_cleaning_factory(cms_original_clean_company_name_general)\n", - "\n", - "cluster_pipeline={\n", - " \"clean_ch_comp_names\": {\n", - " \"function\": clean_ch_name,\n", - " \"arguments\": {\n", - " \"column\": \"company_name\"\n", - " },\n", - " },\n", - " \"clean_postcode\": {\n", - " \"function\": clean_postcode,\n", - " \"arguments\": {\n", - " \"column\": \"postcode\"\n", - " },\n", - " }\n", - "}\n", - "dim_pipeline={\n", - " \"clean__comp_names\": {\n", - " \"function\": clean_gen_name,\n", - " \"arguments\": {\n", - " \"column\": \"company_name\"\n", - " },\n", - " },\n", - " \"clean_postcode\": {\n", - " \"function\": clean_postcode,\n", - " \"arguments\": {\n", - " \"column\": \"postcode\"\n", - " },\n", - " }\n", - "}\n", - "link_settings={\n", - " \"company_name\": {\n", - " \"cluster\": \"company_name\",\n", - " \"dimension\": \"company_name\"\n", - " },\n", - " \"postcode\": {\n", - " \"cluster\": \"postcode\",\n", - " \"dimension\": \"postcode\"\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "2e0df3e0-28fe-439f-bc09-64c7785234ab", - "metadata": {}, - "source": [ - "## Full evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "cbdd3f7c-dda4-4e0b-8bbe-2dbcac89755b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:src.link.linker:Running pipeline\n", - "INFO:src.link.linker:Logging outputs to the Probabilities table\n", - "INFO:src.link.linker:Logging as MLflow experiment\n", - "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): mlflow--data-science.data.trade.gov.uk:8004\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:git.util:Failed checking if running in CYGWIN due to: FileNotFoundError(2, 'No such file or directory')\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/create HTTP/1.1\" 200 1095\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-batch HTTP/1.1\" 200 2\n", - "INFO:src.link.linker:Running prepare() function\n", - "INFO:src.link.linker:Running link() function\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-metric HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/update HTTP/1.1\" 200 433\n", - "INFO:src.link.linker:Writing parameters to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/deterministic\n", - "INFO:src.link.linker:Writing metrics to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/deterministic\n", - "INFO:src.link.linker:Writing artefacts to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/deterministic\n", - "INFO:src.link.linker:Done!\n" - ] - } - ], - "source": [ - "cl_x_exp.evaluate(\n", - " link_experiment=\"cm_hmrc-trade-exporters\",\n", - " evaluation_description=\"\"\"\n", - " - Deterministic name/postcode\n", - " - Cleaned name as per existing CMS rules\n", - " \"\"\",\n", - " prepare_kwargs={\n", - " \"cluster_pipeline\": cluster_pipeline,\n", - " \"dim_pipeline\": dim_pipeline,\n", - " \"link_settings\": link_settings\n", - " },\n", - " link_kwargs={},\n", - " report_dir=Path(\n", - " loc.PROJECT_DIR, \n", - " 'scratch', \n", - " 'reports', \n", - " 'cm_hmrc-trade-exporters',\n", - " 'deterministic'\n", - " ),\n", - " log_mlflow=True,\n", - " log_output=True,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3dd4326c-e40d-4a96-bc07-aa376c4c3d74", - "metadata": {}, - "source": [ - "## Prepare data" - ] - }, - { - "cell_type": "code", - "execution_count": 148, - "id": "a156528b-349e-405a-82ea-b78dec6f8c7e", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.prepare(\n", - " cluster_pipeline=cluster_pipeline,\n", - " dim_pipeline=dim_pipeline,\n", - " link_settings=link_settings\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "956ab425-a7de-4be2-988b-32240a94f81c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
011891941stcalllockkeyshopb302bt
134906151stcallmobilitylu55xf
223032591stcallmobilitylu55xf
35717261stcallmobilitycm195ar
43437001stcarimportsdn91hs
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "0 1189194 1stcalllockkeyshop b302bt\n", - "1 3490615 1stcallmobility lu55xf\n", - "2 2303259 1stcallmobility lu55xf\n", - "3 571726 1stcallmobility cm195ar\n", - "4 343700 1stcarimports dn91hs" - ] - }, - "execution_count": 149, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
0e4607fd5-11d2-4746-b850-480808029c4fitresidente20lt
13c4498ec-6525-405e-b057-c0ab7182268dklinelngshippingukec2v7bp
2b96f4500-7b62-4fcb-8df4-edb978a80632mnapropertydundeedd54ra
30b305d2d-dc85-462a-a860-a5b95bfce4efphoenixmanagementen48re
4f51d7369-2fa3-4789-993e-647c3eb80c24step13transportb170nl
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "0 e4607fd5-11d2-4746-b850-480808029c4f itresident e20lt\n", - "1 3c4498ec-6525-405e-b057-c0ab7182268d klinelngshippinguk ec2v7bp\n", - "2 b96f4500-7b62-4fcb-8df4-edb978a80632 mnapropertydundee dd54ra\n", - "3 0b305d2d-dc85-462a-a860-a5b95bfce4ef phoenixmanagement en48re\n", - "4 f51d7369-2fa3-4789-993e-647c3eb80c24 step13transport b170nl" - ] - }, - "execution_count": 149, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cl_x_exp.dim_processed.head(5)\n", - "cl_x_exp.cluster_processed.head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "8be87bdf-8b75-4811-b01a-5976b413e8ad", - "metadata": {}, - "source": [ - "## Link data" - ] - }, - { - "cell_type": "code", - "execution_count": 150, - "id": "fe859234-36e5-4f37-be56-c532de97a87e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clusteridsourceprobabilityuuidlink_typemodel
01d7f4b26-189a-4a2b-9b8b-489baeffabfb227681954717175ea98b2-c13e-479f-a9f1-817f48956ba3linkn1_deterministic_basic
1690b86e1-9012-49da-99a8-ecd08a1a7e10228643854717123af35aa-fb04-4576-9458-455061950618linkn1_deterministic_basic
24a64ee28-e3dc-4128-9642-24a22f678495872651547171622522dc-e2c3-412b-8e7f-0d8f705d0599linkn1_deterministic_basic
3272d50a7-5304-4d2f-b6aa-f52549e940e13517005547171b626b7d1-a402-4b89-846e-b3abf9cc954blinkn1_deterministic_basic
4b3082628-24c5-4f1f-a0bb-95bc2036127127193905471716e32b13a-301c-4ddb-8ce8-8bfd4db04ebblinkn1_deterministic_basic
........................
197816d2e0c-e25e-4576-a36e-6771dcd634753059580547171a4f2ed80-a0c7-4ddb-85bc-8532c773cb55linkn1_deterministic_basic
198b9558b7c-cb05-424b-b2a7-32df83ff0415596995471712cedbbc6-fc4f-410e-8940-14e22900c57blinkn1_deterministic_basic
1998cdfc90d-ff6b-4281-8dd7-b601f137f991320574054717113be15b6-efe4-414b-a47a-6d54b0973e8blinkn1_deterministic_basic
2006ab6d3ec-dbc0-4083-b1c3-c84eeb9e6f7d24238545471712640b8fb-eb02-499b-aacd-233547a14fcelinkn1_deterministic_basic
2012eaa66ac-a3e3-4721-a7dc-905df7ed09b9337510254717191a236d6-294a-4e80-bc9f-3c8944b06e1blinkn1_deterministic_basic
\n", - "

202 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " cluster id source probability \\\n", - "0 1d7f4b26-189a-4a2b-9b8b-489baeffabfb 2276819 54717 1 \n", - "1 690b86e1-9012-49da-99a8-ecd08a1a7e10 2286438 54717 1 \n", - "2 4a64ee28-e3dc-4128-9642-24a22f678495 872651 54717 1 \n", - "3 272d50a7-5304-4d2f-b6aa-f52549e940e1 3517005 54717 1 \n", - "4 b3082628-24c5-4f1f-a0bb-95bc20361271 2719390 54717 1 \n", - ".. ... ... ... ... \n", - "197 816d2e0c-e25e-4576-a36e-6771dcd63475 3059580 54717 1 \n", - "198 b9558b7c-cb05-424b-b2a7-32df83ff0415 59699 54717 1 \n", - "199 8cdfc90d-ff6b-4281-8dd7-b601f137f991 3205740 54717 1 \n", - "200 6ab6d3ec-dbc0-4083-b1c3-c84eeb9e6f7d 2423854 54717 1 \n", - "201 2eaa66ac-a3e3-4721-a7dc-905df7ed09b9 3375102 54717 1 \n", - "\n", - " uuid link_type model \n", - "0 75ea98b2-c13e-479f-a9f1-817f48956ba3 link n1_deterministic_basic \n", - "1 23af35aa-fb04-4576-9458-455061950618 link n1_deterministic_basic \n", - "2 622522dc-e2c3-412b-8e7f-0d8f705d0599 link n1_deterministic_basic \n", - "3 b626b7d1-a402-4b89-846e-b3abf9cc954b link n1_deterministic_basic \n", - "4 6e32b13a-301c-4ddb-8ce8-8bfd4db04ebb link n1_deterministic_basic \n", - ".. ... ... ... \n", - "197 a4f2ed80-a0c7-4ddb-85bc-8532c773cb55 link n1_deterministic_basic \n", - "198 2cedbbc6-fc4f-410e-8940-14e22900c57b link n1_deterministic_basic \n", - "199 13be15b6-efe4-414b-a47a-6d54b0973e8b link n1_deterministic_basic \n", - "200 2640b8fb-eb02-499b-aacd-233547a14fce link n1_deterministic_basic \n", - "201 91a236d6-294a-4e80-bc9f-3c8944b06e1b link n1_deterministic_basic \n", - "\n", - "[202 rows x 7 columns]" - ] - }, - "execution_count": 150, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cl_x_exp.link(\n", - " log_output=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 151, - "id": "2ecebc94-8c93-42a8-b68a-9cc631b9d363", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "exp_n2_splink_basic 164269\n", - "n1_deterministic_basic 202\n", - "Name: model, dtype: int64" - ] - }, - "execution_count": 151, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = probabilities.read()\n", - "x.model.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 170, - "id": "f5736ce3-9f8f-4721-a347-9ad738ef06cd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 170, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x.model.nunique()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_existing-service.ipynb b/notebooks/models/WL_existing-service.ipynb deleted file mode 100644 index bbea4ab..0000000 --- a/notebooks/models/WL_existing-service.ipynb +++ /dev/null @@ -1,2014 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "0b389e80-89a0-4544-b508-b0b07ee9070c", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "f6c04d59-c6d9-4a28-9fe1-60d106239b9f", - "metadata": {}, - "source": [ - "# Existing company matching service\n", - "\n", - "I needed to be able to evaluate against predictions made by the current company matching service that:\n", - "\n", - "* Only made one match per record\n", - "* Only joined the most likely match for a record\n", - "* Had no bias to which table was being joined onto the other\n", - "* Reflected the \"truest\" belief of the service (using postcode)\n", - "\n", - "What follows is an EDA done in SQL, which I'm pulling over with very few checks just so the code doesn't get lost.\n", - "\n", - "I'm focusing on [Companies House company data](https://data.trade.gov.uk/datasets/a777d199-53a4-4d0a-bbbb-1559a86f8c4c#companies-house-company-data) and [UK exporters](https://data.trade.gov.uk/datasets/76fb2db3-ab32-4af8-ae87-d41d36b31265#uk-exporters).\n", - "\n", - "`make dims` had been run to produce the dimension tables." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9e8ea063-717f-46a2-aa55-a8caab5bbd26", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf.data import utils as du\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "\n", - "dotenv_path = find_dotenv()\n", - "load_dotenv(dotenv_path)" - ] - }, - { - "cell_type": "markdown", - "id": "1e3727a5-28da-46ae-a149-85e1afdec105", - "metadata": {}, - "source": [ - "## Dim table sizes" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4bc10f51-6107-4975-9f6e-944e1112c8de", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0254243
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 254243" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\";\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "6366df93-7433-4f41-a079-0e0f2645a6d4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
05381225
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 5381225" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " \"companieshouse\".\"companies\";\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "28fbf12d-867c-48ee-9ae7-6a629f00b7c4", - "metadata": {}, - "source": [ - "## Lead Gen Experiments match method" - ] - }, - { - "cell_type": "markdown", - "id": "76c1f26a-c0b3-46f6-871e-82bad88f29b4", - "metadata": {}, - "source": [ - "### Left: companies house, right: exporters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "99cf6dd5-f72e-44cb-8fcb-1fa81aa2df1d", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table {os.getenv(\"SCHEMA\")}.test_match_lr;\n", - " create table {os.getenv(\"SCHEMA\")}.test_match_lr as\n", - " select distinct on (w_match.id, c_match.match_id)\n", - " w_match.id as export_id,\n", - " w_match.match_id as export_match_id,\n", - " w_match.similarity as export_match_similarity,\n", - " -- Ignore postcode, sum similarity\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(w_match.similarity, 6), '')) as co) as match_sum_similarity,\n", - " c_match.id as crn,\n", - " c_match.match_id as company_match_id,\n", - " c_match.similarity as company_match_similarity,\n", - " -- Ignore postcode, sum similarity\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(c_match.similarity, 6), '')) as co) as ch_sum_similarity\n", - " from\n", - " companieshouse.companies__match_ids w_match\n", - " left join \n", - " hmrc.trade__exporters__match_ids c_match on\n", - " w_match.match_id = c_match.match_id\n", - " order by\n", - " -- Order by similarity, take the top (see select statement)\n", - " w_match.id, \n", - " c_match.match_id,\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(c_match.similarity, 6), '')) as co) desc;\n", - " \"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a8e32a0b-778b-4803-87eb-a0196bd188cf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
05336353
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 5336353" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_lr;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "2d166b6e-7644-4ef7-82fe-5b3df9fe7286", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
069146
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 69146" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_lr m\n", - " inner join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" d on\n", - " d.id::text = m.crn;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "8b8bb671-8bb9-4831-ba92-dae947dd2658", - "metadata": {}, - "source": [ - "(of 254243)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "a458b686-5ca9-4525-84a9-59d86939afc9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
05157812
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 5157812" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_lr m\n", - " inner join\n", - " \"companieshouse\".\"companies\" d on\n", - " d.id = m.export_id;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "c3eae059-bef1-43d5-ac4b-4a3e71f25027", - "metadata": {}, - "source": [ - "(of 5359637)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e6b723a8-40be-485a-8feb-a6d37c8f05be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
068500
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 68500" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_lr lkp\n", - " left join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = lkp.crn\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = lkp.export_id\n", - " where\n", - " l.id is not null and r.id is not null\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "62a96e77-1c76-430a-82d6-c76639adf882", - "metadata": {}, - "source": [ - "### Left: exporters, right: companies house" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "907f8af9-6152-4a41-b60e-88d7f2f0986f", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table {os.getenv(\"SCHEMA\")}.test_match_rl;\n", - " create table {os.getenv(\"SCHEMA\")}.test_match_rl as\n", - " select distinct on (w_match.id, c_match.match_id)\n", - " w_match.id as export_id,\n", - " w_match.match_id as export_match_id,\n", - " w_match.similarity as export_match_similarity,\n", - " -- Ignore postcode, sum similarity\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(w_match.similarity, 6), '')) as co) as match_sum_similarity,\n", - " c_match.id as crn,\n", - " c_match.match_id as company_match_id,\n", - " c_match.similarity as company_match_similarity,\n", - " -- Ignore postcode, sum similarity\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(c_match.similarity, 6), '')) as co) as ch_sum_similarity\n", - " from\n", - " hmrc.trade__exporters__match_ids w_match\n", - " left join \n", - " companieshouse.companies__match_ids c_match on\n", - " w_match.match_id = c_match.match_id\n", - " order by\n", - " -- Order by similarity, take the top (see select statement)\n", - " w_match.id, \n", - " c_match.match_id,\n", - " (select sum(co::int) from unnest(regexp_split_to_array(left(c_match.similarity, 6), '')) as co) desc;\n", - " \"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "c65bab05-03e3-4808-b37f-19e9c29bf33f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
03418561
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 3418561" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_rl;\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c74683e3-c194-4f8a-9825-f7c0e4aac85a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0254243
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 254243" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_rl m\n", - " inner join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" d on\n", - " d.id::text = m.export_id;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "bcff3258-f239-45a7-9795-d1e936a25a6f", - "metadata": {}, - "source": [ - "(of 254243)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "97658372-6210-42ff-9803-2c437bb22fe3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
03273969
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 3273969" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_rl m\n", - " inner join\n", - " \"companieshouse\".\"companies\" d on\n", - " d.id::text = m.crn;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "58e560a9-6ad9-4074-83a5-cd17c5b219c6", - "metadata": {}, - "source": [ - "(of 5359637)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "28a5fe55-91a6-4870-b42e-122aa2872987", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0235820
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 235820" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_rl lkp\n", - " left join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = lkp.export_id\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = lkp.crn\n", - " where\n", - " l.id is not null and r.id is not null\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "6d90236b-c7ca-4cea-bbc8-a2312a704b8c", - "metadata": {}, - "source": [ - "## Why?\n", - "\n", - "The algo gets to the top match on the right given what's on the left needs matching.\n", - "\n", - "If all your exporters need matching, it can match them all -- even if some weren't its top pick.\n", - "\n", - "If all your companies need matching, it can match them all -- even if some weren't its top pick.\n", - "\n", - "Our method wants to _succeed_, not _evaluate_.\n", - "\n", - "Recommend:\n", - "\n", - "1. Write a new algorithm that isn't opinionated\n", - "2. OR choose the (flawed) one with CH on the left and leave this as something to iterate" - ] - }, - { - "cell_type": "markdown", - "id": "b33c98b5-e0d4-4072-9307-ad36f9e4b240", - "metadata": {}, - "source": [ - "## New method\n", - "\n", - "* For two dim tables\n", - "* Connect company matching match tables, including things that weren't matched (full join)\n", - "* Connect in the two dim tables and show where we've successfully connected (because company matching matches FACT tables)\n", - "* Only one row is allowed per cluster. We prefer:\n", - " * The highest score\n", - " * Exists in one of the two dim tables\n", - "* If company matching scored a match not in our dim table highest, we drop it\n", - " * It shouldn't do -- at worst it'll be tied for top match" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45a93960-5fb2-475d-bae5-d55a30226c36", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table if exists {os.getenv(\"SCHEMA\")}.ch_x_exp_eval;\n", - " create table {os.getenv(\"SCHEMA\")}.ch_x_exp_eval as (\n", - " select distinct on (cluster)\n", - " cluster,\n", - " score,\n", - " l_id,\n", - " l_hit,\n", - " r_id,\n", - " r_hit\n", - " from (\n", - " select\n", - " l_lkp.id as l_id,\n", - " case \n", - " when l.id is not null\n", - " then true\n", - " else false\n", - " end as l_hit,\n", - " r_lkp.id as r_id,\n", - " case \n", - " when r.id is not null\n", - " then true\n", - " else false\n", - " end as r_hit,\n", - " l_lkp.match_id as cluster,\n", - " coalesce(\n", - " (\n", - " char_length(replace(l_lkp.similarity, '0', ''))\n", - " +\n", - " char_length(replace(r_lkp.similarity, '0', ''))\n", - " ),\n", - " 0\n", - " ) as score\n", - " from\n", - " \"hmrc\".\"trade__exporters__match_ids\" l_lkp\n", - " full join\n", - " companieshouse.companies__match_ids r_lkp on\n", - " l_lkp.match_id = r_lkp.match_id\n", - " left join\n", - " _user_eaf4fd9a.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = l_lkp.id\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = r_lkp.id\t\n", - " ) raw_matches\n", - " order by\n", - " cluster desc,\n", - " score desc,\n", - " l_hit desc,\n", - " r_hit desc\n", - " );\n", - " \"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "0b35623b-59cb-4685-96cf-52e84df829cd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0188154
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 188154" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# all company matching entries from dim tables\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.ch_x_exp_eval\n", - " where\n", - " l_hit = true or r_hit = true\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "5d3e769a-6fba-4ec6-b45a-4478574ce680", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0188154
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 188154" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# all company matching entries from export dim table \n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.ch_x_exp_eval\n", - " where\n", - " l_hit = true\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "b8eb36fc-36be-42ff-a1e0-ddd038e632a1", - "metadata": {}, - "source": [ - "(of 254243)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c3e75e75-5b26-4bd8-98ba-5ed5673f6237", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0175468
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 175468" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# all company matching entries from company dim table\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.ch_x_exp_eval\n", - " where\n", - " r_hit = true\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "ec16ca86-e028-4ded-996d-c3c951a9cffc", - "metadata": {}, - "source": [ - "(of 5359637)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "420de95c-c96a-4098-a80f-ad272c9769f5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0175468
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 175468" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.ch_x_exp_eval\n", - " where\n", - " l_hit = true and r_hit = true\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "d01da233-553a-40a0-974a-e43a272c50ff", - "metadata": {}, - "source": [ - "## How do the approaches differ?\n", - "\n", - "I set up three evaluation tables to compare and contrast.\n", - "\n", - "* v1 is original method, CH on the left\n", - "* v2 is original method, exporters on the left\n", - "* v3 is the new method" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36799a40-28d7-4045-a991-64be3eeebbd5", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table if exists {os.getenv(\"SCHEMA\")}.match_test_v1;\n", - " create table {os.getenv(\"SCHEMA\")}.match_test_v1 as (\n", - " select\n", - " export_id as crn,\n", - " crn as export_id,\n", - " match_sum_similarity,\n", - " ch_sum_similarity,\n", - " lkp.export_match_id as cluster,\n", - " r.company_name as ch_name,\n", - " r.postcode as ch_pc,\n", - " l.company_name as exp_name,\n", - " l.postcode as exp_pc\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_lr lkp\n", - " left join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = lkp.crn\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = lkp.export_id\n", - " where\n", - " l.id is not null \n", - " and r.id is not null\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04373a34-f7b0-404c-a6bd-9d481aef0de8", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table if exists {os.getenv(\"SCHEMA\")}.match_test_v2;\n", - " create table {os.getenv(\"SCHEMA\")}.match_test_v2 as (\n", - " select\n", - " export_id,\n", - " crn,\n", - " match_sum_similarity,\n", - " ch_sum_similarity,\n", - " lkp.export_match_id as cluster,\n", - " r.company_name as ch_name,\n", - " r.postcode as ch_pc,\n", - " l.company_name as exp_name,\n", - " l.postcode as exp_pc\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.test_match_rl lkp\n", - " left join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = lkp.export_id\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = lkp.crn\n", - " where\n", - " l.id is not null \n", - " and r.id is not null\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d772b69f-93d9-48de-806e-320515de0222", - "metadata": {}, - "outputs": [], - "source": [ - "du.query_nonreturn(f\"\"\"\n", - " drop table if exists {os.getenv(\"SCHEMA\")}.match_test_v3;\n", - " create table {os.getenv(\"SCHEMA\")}.match_test_v3 as (\n", - " select\n", - " lkp.*,\n", - " r.company_name as ch_name,\n", - " r.postcode as ch_pc,\n", - " l.company_name as exp_name,\n", - " l.postcode as exp_pc\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.ch_x_exp_eval lkp\n", - " left join\n", - " {os.getenv(\"SCHEMA\")}.\"hmrc_trade__exporters__dim\" l on\n", - " l.id::text = lkp.l_id\n", - " left join\n", - " \"companieshouse\".\"companies\" r on\n", - " r.id = lkp.r_id\t\n", - " where\n", - " l_hit = true \n", - " and r_hit = true\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "abf51672-e9a8-4e4c-bbbd-39b859c877b5", - "metadata": {}, - "source": [ - "### Agree" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "0003a9cc-5537-446d-b593-b0e5e00e5302", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
064075
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 64075" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select \n", - " count(*)\n", - " from \n", - " {os.getenv(\"SCHEMA\")}.match_test_v1 v1\n", - " inner join\n", - " {os.getenv(\"SCHEMA\")}.match_test_v2 v2 on\n", - " v1.export_id = v2.export_id\n", - " and v1.crn = v2.crn\n", - " inner join\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3 on\n", - " v1.export_id = v3.l_id\n", - " and v1.crn = v3.r_id;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "a51c5a04-5bf1-4d16-82c8-0279eb7db59d", - "metadata": {}, - "source": [ - "### Who does it better? v3 vs v1" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "51cfd745-7a83-4aac-ad55-dd60494dae40", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0111393
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 111393" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# disagree: in v3, not v1\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where not exists (\n", - " select\n", - " export_id,\n", - " crn\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v1 v1\n", - " where\n", - " v1.crn = v3.r_id\n", - " and v1.export_id = v3.l_id\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "939ad265-9bc6-4f8c-9e9a-c3fd9dc8a5f6", - "metadata": {}, - "source": [ - "Does this accurately represent the belief of the matching service?\n", - "\n", - "* e\"2256473\" to ch\"03042765\" via 3369780 -- appropriate. Two postcode-only exp matches, either as likely as the other. Including PC is the diff\n", - "* e\"2407592\" to ch\"11911888\" via 1013097 -- appropriate. Two equal matches, either as likely as the other. Order probably the diff\n", - "* e\"2645274\" to ch\"01660807\" via 2344457 -- appropriate. Two postcode-only exp matches, either as likely as the other. Including PC is the diff" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "a2cf2199-9e65-4492-a2d4-272a33bd8273", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
04645
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 4645" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# disagree: in v1, not v3\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v1 v1\n", - " where not exists (\n", - " select\n", - " l_id,\n", - " r_id\n", - " from\n", - "\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where\n", - " v1.crn = v3.r_id\n", - " and v1.export_id = v3.l_id\n", - " )\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "3dcb82ef-7e4f-431e-8a98-4024016e0da6", - "metadata": {}, - "source": [ - "Does this accurately represent the belief of the matching service?\n", - "\n", - "* e\"2925935\" to ch\"03512796\" via 8133 -- Two equal matches, either as likely as the other. Order probably the diff\n", - "* e\"11630588\" to ch\"387111\" via 3092652 -- Two postcode-only exp matches, either as likely as the other. Including PC is the diff\n", - "\n", - "What about CLUSTERS not being matched? That might prove a difference" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "afa0db6a-b464-4fc7-9ae1-216e7aa2213f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0322
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 322" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v1 v1\n", - " where not exists (\n", - " select\n", - " v3.cluster\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where\n", - " v1.cluster = v3.cluster\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "9a9a4245-3f2a-4f02-8877-7dabb32c214b", - "metadata": {}, - "source": [ - "322, all bad matches\n", - "\n", - "1564656 doesn't exist in HMRC exporters match ids. How did this happen?" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "f63735c1-c72b-4ce5-8921-2f8d09503080", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0107070
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 107070" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where not exists (\n", - " select\n", - " v1.cluster\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v1 v1\n", - " where\n", - " v1.cluster = v3.cluster\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "2d016bac-b0bc-43fa-b790-237f877d89fd", - "metadata": {}, - "source": [ - "100k rows, mostly looking solid\n", - "\n", - "**🏆 V3 WINS**" - ] - }, - { - "cell_type": "markdown", - "id": "b8615013-44ae-43e2-968e-ec92710a7b39", - "metadata": {}, - "source": [ - "### Who does it better? v3 vs v2" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "da1446fc-bbe1-4d83-9aef-a5b39ab78923", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0887
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 887" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# disagree: in v3, not v2\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where not exists (\n", - " select\n", - " export_id,\n", - " crn\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v2 v2\n", - " where\n", - " v2.crn = v3.r_id\n", - " and v2.export_id = v3.l_id\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "0511e0a2-1b18-4d3b-978f-b9080e30f2a7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
061703
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 61703" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# disagree: in v2, not v3\n", - "\n", - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v2 v2\n", - " where not exists (\n", - " select\n", - " l_id,\n", - " r_id\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where\n", - " v2.crn = v3.r_id\n", - " and v2.export_id = v3.l_id\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "d1d6adc6-5183-4a4e-a297-96d76123c99e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
01422
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 1422" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v2 v2\n", - " where not exists (\n", - " select\n", - " v3.cluster\n", - " from\n", - "\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where\n", - " v2.cluster = v3.cluster\n", - " );\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "b6f5de21-5104-4744-ab55-e467dc3bc0c9", - "metadata": {}, - "source": [ - "1422, all bad matches\n", - "\n", - "Cluster 2159702 doesn't exist in the exporters dataset. How has it been matched? Same for 100285" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "22d287ce-a54e-4130-914a-250d85505048", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
count
0805
\n", - "
" - ], - "text/plain": [ - " count\n", - "0 805" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "du.query(f\"\"\"\n", - " select\n", - " count(*)\n", - " --*\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v3 v3\n", - " where not exists (\n", - " select\n", - " v2.cluster\n", - " from\n", - " {os.getenv(\"SCHEMA\")}.match_test_v2 v2\n", - " where\n", - " v2.cluster = v3.cluster\n", - " )\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "5021e6f5-bd2c-4f1f-9a06-de12e0e2a6a9", - "metadata": {}, - "source": [ - "100k rows, good and bad matches.\n", - "\n", - "* 18584 is wrong but uses postcode, and is a fair representation of the match system's belief\n", - "* 3140785 is right but uses postcode -- again, fair representation\n", - "\n", - "**🏆 V3 WINS**" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_existingcms-tests.ipynb b/notebooks/models/WL_existingcms-tests.ipynb deleted file mode 100644 index 4e701b7..0000000 --- a/notebooks/models/WL_existingcms-tests.ipynb +++ /dev/null @@ -1,686 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b239cd5d-5c6b-4370-9e9f-662ffae4d58f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "865b59b8-fc8e-4402-97c5-1192dba6fd42", - "metadata": {}, - "source": [ - "# ExistingCMSPlus linker\n", - "\n", - "A place to fix and test the existing CMS+ linker." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "bb0f2cfd-4aa9-483c-99fa-49152aefaad0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:git.cmd:Popen(['git', 'version'], cwd=/home/jovyan/company-matching/notebooks, universal_newlines=False, shell=None, istream=None)\n", - "DEBUG:git.cmd:Popen(['git', 'version'], cwd=/home/jovyan/company-matching/notebooks, universal_newlines=False, shell=None, istream=None)\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf import locations as loc\n", - "from cmf.data import utils as du\n", - "from cmf.data.star import Star\n", - "from cmf.data.datasets import Dataset\n", - "from cmf.data.probabilities import Probabilities\n", - "from cmf.data.clusters import Clusters\n", - "from cmf.data.validation import Validation\n", - "from cmf.link.existingservice_linker import ExistingCMSPlusLinker\n", - "from cmf.features.clean_complex import duckdb_cleaning_factory\n", - "from cmf.features.clean_basic_original import (\n", - " cms_original_clean_company_name_general,\n", - " cms_original_clean_company_name_ch,\n", - " cms_original_clean_postcode,\n", - " cms_original_clean_email,\n", - " cms_original_clean_ch_id,\n", - " cms_original_clean_cdms_id\n", - ")\n", - "\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "import duckdb\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "\n", - "dotenv_path = find_dotenv()\n", - "load_dotenv(dotenv_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a47bb693-085a-431e-a14b-ab7acf55f773", - "metadata": {}, - "outputs": [], - "source": [ - "star = Star(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"STAR_TABLE\")\n", - ")\n", - "probabilities = Probabilities(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"PROBABILITIES_TABLE\"),\n", - " star = star\n", - ")\n", - "clusters = Clusters(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"CLUSTERS_TABLE\"),\n", - " star = star\n", - ")\n", - "validation = Validation(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"VALIDATE_TABLE\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f11719a3-9023-4683-8664-542988bd81b3", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp = ExistingCMSPlusLinker(\n", - " name=\"n1_cms_basic\",\n", - " dataset = Dataset(\n", - " star_id=54717,\n", - " star=star\n", - " ), \n", - " probabilities=probabilities, \n", - " clusters=clusters, \n", - " n=1,\n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5c5aa81e-db8a-4b4b-806b-e5f3f0b3a5d7", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.get_data(\n", - " # sample=5,\n", - " cluster_select={\n", - " '\"companieshouse\".\"companies\"': [\n", - " \"company_name as company_name\",\n", - " \"postcode as postcode\"\n", - " ]\n", - " },\n", - " dim_select=[\n", - " \"id\",\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4e60f7b1-b6f0-4b1d-8610-b9ee861decf5", - "metadata": {}, - "outputs": [], - "source": [ - "clean_postcode = duckdb_cleaning_factory(cms_original_clean_postcode)\n", - "clean_ch_name = duckdb_cleaning_factory(cms_original_clean_company_name_ch)\n", - "clean_gen_name = duckdb_cleaning_factory(cms_original_clean_company_name_general)\n", - "\n", - "cluster_pipeline={\n", - " \"clean_ch_comp_names\": {\n", - " \"function\": clean_ch_name,\n", - " \"arguments\": {\n", - " \"column\": \"company_name\"\n", - " },\n", - " },\n", - " \"clean_postcode\": {\n", - " \"function\": clean_postcode,\n", - " \"arguments\": {\n", - " \"column\": \"postcode\"\n", - " },\n", - " }\n", - "}\n", - "dim_pipeline={\n", - " \"clean__comp_names\": {\n", - " \"function\": clean_gen_name,\n", - " \"arguments\": {\n", - " \"column\": \"company_name\"\n", - " },\n", - " },\n", - " \"clean_postcode\": {\n", - " \"function\": clean_postcode,\n", - " \"arguments\": {\n", - " \"column\": \"postcode\"\n", - " },\n", - " }\n", - "}\n", - "link_settings={\n", - " \"company_name\": {\n", - " \"cluster\": \"company_name\",\n", - " \"dimension\": \"company_name\",\n", - " \"weight\": 2\n", - " },\n", - " \"postcode\": {\n", - " \"cluster\": \"postcode\",\n", - " \"dimension\": \"postcode\",\n", - " \"weight\": 1\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "2e0df3e0-28fe-439f-bc09-64c7785234ab", - "metadata": {}, - "source": [ - "## Full evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "cbdd3f7c-dda4-4e0b-8bbe-2dbcac89755b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:src.link.linker:Running pipeline\n", - "INFO:src.link.linker:Logging outputs to the Probabilities table\n", - "INFO:src.link.linker:Logging as MLflow experiment\n", - "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): mlflow--data-science.data.trade.gov.uk:8004\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:git.util:Failed checking if running in CYGWIN due to: FileNotFoundError(2, 'No such file or directory')\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/create HTTP/1.1\" 200 1116\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-batch HTTP/1.1\" 200 2\n", - "INFO:src.link.linker:Running prepare() function\n", - "INFO:src.link.linker:Running link() function\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-parameter HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-metric HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/update HTTP/1.1\" 200 423\n", - "INFO:src.link.linker:Writing parameters to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/existing\n", - "INFO:src.link.linker:Writing metrics to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/existing\n", - "INFO:src.link.linker:Writing artefacts to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters/existing\n", - "INFO:src.link.linker:Done!\n" - ] - } - ], - "source": [ - "cl_x_exp.evaluate(\n", - " link_experiment=\"cm_hmrc-trade-exporters\",\n", - " evaluation_description=\"\"\"\n", - " - Existing CMS for name/postcode\n", - " - Cleaned name as per existing CMS rules\n", - " - Name match double weighted\n", - " \"\"\",\n", - " prepare_kwargs={\n", - " \"cluster_pipeline\": cluster_pipeline,\n", - " \"dim_pipeline\": dim_pipeline,\n", - " \"link_settings\": link_settings\n", - " },\n", - " link_kwargs={\n", - " \"threshold\": 0.5\n", - " },\n", - " report_dir=Path(\n", - " loc.PROJECT_DIR, \n", - " 'scratch', \n", - " 'reports', \n", - " 'cm_hmrc-trade-exporters',\n", - " 'existing'\n", - " ),\n", - " log_mlflow=True,\n", - " log_output=True,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3dd4326c-e40d-4a96-bc07-aa376c4c3d74", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "## Prepare data" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "a156528b-349e-405a-82ea-b78dec6f8c7e", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.prepare(\n", - " cluster_pipeline=cluster_pipeline,\n", - " dim_pipeline=dim_pipeline,\n", - " link_settings=link_settings\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "956ab425-a7de-4be2-988b-32240a94f81c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
02937257194localvtge16sa
121248281953ec1y8jl
225703781953sw32er
31537512195mphlmiteds92tj
43359348195mphlmiteddn227wf
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "0 2937257 194localvtg e16sa\n", - "1 2124828 1953 ec1y8jl\n", - "2 2570378 1953 sw32er\n", - "3 1537512 195mphlmited s92tj\n", - "4 3359348 195mphlmited dn227wf" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
009bc545d-236a-4d15-9073-49ddabdc4b50109uptonroadmanagementca89lz
10d4d985a-c15b-43e2-8142-c8f868025e1a1093eding403px
2cb9118f3-556c-4b1d-b3b5-ceaaa29993b81094874s11wf
31016c07d-2330-4076-ab7d-21ab6900a431109londonss11eg
4c0e587ab-4b94-4c70-bcd0-b57b8c64ca211010gamesbl14qr
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "0 09bc545d-236a-4d15-9073-49ddabdc4b50 109uptonroadmanagement ca89lz\n", - "1 0d4d985a-c15b-43e2-8142-c8f868025e1a 1093edin g403px\n", - "2 cb9118f3-556c-4b1d-b3b5-ceaaa29993b8 1094874 s11wf\n", - "3 1016c07d-2330-4076-ab7d-21ab6900a431 109london ss11eg\n", - "4 c0e587ab-4b94-4c70-bcd0-b57b8c64ca21 1010games bl14qr" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cl_x_exp.dim_processed.head(5)\n", - "cl_x_exp.cluster_processed.head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "8be87bdf-8b75-4811-b01a-5976b413e8ad", - "metadata": {}, - "source": [ - "## Link data" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "fe859234-36e5-4f37-be56-c532de97a87e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clusteridsourceprobabilityuuidlink_typemodel
035f7dbe2-2eb1-4df7-b50f-041cbc5d246e1077171547170.333333029e3732-4154-4cb2-98b4-8e64e1196c31linkn1_cms_basic
1f33a7ab9-85b2-482e-8c02-5ca791ba76fc1077171547170.333333a9ed34c6-6018-496f-9617-3c62ac0b36cdlinkn1_cms_basic
2524e2a90-bc77-4b3d-992b-53ecd6dbc7921077171547170.3333330b6f4cf7-aecc-4f90-93ed-0bd7e0e21922linkn1_cms_basic
3b2c53705-e74c-47ef-9b32-94cb941ba3931077171547170.3333334910d8b9-ee29-4a9a-a873-a2135c27d3f0linkn1_cms_basic
4e9c1fd4e-b2ec-489a-8c94-5cdcc7a7ac1e1077171547170.333333f930bf59-dea7-4bb3-afb3-f0fd72a244f8linkn1_cms_basic
........................
2455540e90228b-bde8-4f8b-a1db-cdbf233475a72260542547170.33333303a1ff06-dda2-4c8f-8696-bf62218fdf70linkn1_cms_basic
24555526383f41-63d7-400d-97c0-3703b14584c52480128547170.333333a6dbf884-a09b-46d1-9cc4-64f3b587c241linkn1_cms_basic
2455568f6d6a90-a62e-4fc3-bcd1-cc6832c6f2182480128547170.333333d6f48469-11d3-4765-9e0d-852323f82c6elinkn1_cms_basic
245557261972d3-5c94-4e03-9ff9-e00d4f4099c63056055547170.333333acd959d2-0f04-4084-adc9-93a363bb2d24linkn1_cms_basic
2455583a115a0c-c05b-4e2d-b105-d317ce4bda362722547170.333333a36e5b34-e9cb-46d9-bf01-ce9935aff1f3linkn1_cms_basic
\n", - "

245559 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " cluster id source probability \\\n", - "0 35f7dbe2-2eb1-4df7-b50f-041cbc5d246e 1077171 54717 0.333333 \n", - "1 f33a7ab9-85b2-482e-8c02-5ca791ba76fc 1077171 54717 0.333333 \n", - "2 524e2a90-bc77-4b3d-992b-53ecd6dbc792 1077171 54717 0.333333 \n", - "3 b2c53705-e74c-47ef-9b32-94cb941ba393 1077171 54717 0.333333 \n", - "4 e9c1fd4e-b2ec-489a-8c94-5cdcc7a7ac1e 1077171 54717 0.333333 \n", - "... ... ... ... ... \n", - "245554 0e90228b-bde8-4f8b-a1db-cdbf233475a7 2260542 54717 0.333333 \n", - "245555 26383f41-63d7-400d-97c0-3703b14584c5 2480128 54717 0.333333 \n", - "245556 8f6d6a90-a62e-4fc3-bcd1-cc6832c6f218 2480128 54717 0.333333 \n", - "245557 261972d3-5c94-4e03-9ff9-e00d4f4099c6 3056055 54717 0.333333 \n", - "245558 3a115a0c-c05b-4e2d-b105-d317ce4bda36 2722 54717 0.333333 \n", - "\n", - " uuid link_type model \n", - "0 029e3732-4154-4cb2-98b4-8e64e1196c31 link n1_cms_basic \n", - "1 a9ed34c6-6018-496f-9617-3c62ac0b36cd link n1_cms_basic \n", - "2 0b6f4cf7-aecc-4f90-93ed-0bd7e0e21922 link n1_cms_basic \n", - "3 4910d8b9-ee29-4a9a-a873-a2135c27d3f0 link n1_cms_basic \n", - "4 f930bf59-dea7-4bb3-afb3-f0fd72a244f8 link n1_cms_basic \n", - "... ... ... ... \n", - "245554 03a1ff06-dda2-4c8f-8696-bf62218fdf70 link n1_cms_basic \n", - "245555 a6dbf884-a09b-46d1-9cc4-64f3b587c241 link n1_cms_basic \n", - "245556 d6f48469-11d3-4765-9e0d-852323f82c6e link n1_cms_basic \n", - "245557 acd959d2-0f04-4084-adc9-93a363bb2d24 link n1_cms_basic \n", - "245558 a36e5b34-e9cb-46d9-bf01-ce9935aff1f3 link n1_cms_basic \n", - "\n", - "[245559 rows x 7 columns]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cl_x_exp.link(\n", - " log_output=True\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_hybridadd-matching.ipynb b/notebooks/models/WL_hybridadd-matching.ipynb deleted file mode 100644 index 9689299..0000000 --- a/notebooks/models/WL_hybridadd-matching.ipynb +++ /dev/null @@ -1,1562 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 13, - "id": "fc07efe0-5cb8-47bb-87b4-ab6f4a475f4e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "7c6625ba-1e67-45a6-8c13-a3f0b20d023d", - "metadata": {}, - "source": [ - "# 🔌Hybrid additive playground\n", - "\n", - "Just a place to get linkers running." - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "id": "6bb13950-4a12-4f3a-b27e-212984ec41e5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cmf import locations as loc\n", - "from cmf.data import utils as du\n", - "from cmf.data.star import Star\n", - "from cmf.data.datasets import Dataset\n", - "from cmf.data.probabilities import Probabilities\n", - "from cmf.data.clusters import Clusters\n", - "from cmf.data.validation import Validation\n", - "from cmf.link.splink_linker import SplinkLinker\n", - "from cmf.config import link_pipeline, stopwords\n", - "from cmf.features.clean_complex import clean_comp_names\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "import splink.duckdb.comparison_library as cl\n", - "import splink.duckdb.comparison_template_library as ctl\n", - "\n", - "import uuid\n", - "import types\n", - "from pathlib import Path\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "import io\n", - "import pandas as pd\n", - "import duckdb\n", - "import json\n", - "\n", - "load_dotenv(find_dotenv())" - ] - }, - { - "cell_type": "markdown", - "id": "420e71d7-752e-4df2-a474-1288f6f69812", - "metadata": {}, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "id": "fa975078-979e-4a98-bbea-1df8a21b57d8", - "metadata": {}, - "outputs": [], - "source": [ - "star = Star(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"STAR_TABLE\")\n", - ")\n", - "probabilities = Probabilities(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"PROBABILITIES_TABLE\"),\n", - " star = star\n", - ")\n", - "clusters = Clusters(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"CLUSTERS_TABLE\"),\n", - " star = star\n", - ")\n", - "validation = Validation(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"VALIDATE_TABLE\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "e91b3a20-c117-48b5-8472-37f9f63e5a52", - "metadata": {}, - "outputs": [], - "source": [ - "cluster_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - "}\n", - "dim_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - "}\n", - "linker_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.company_name = r.company_name)\n", - " and (\n", - " l.company_name <> ''\n", - " and r.company_name <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"company_name\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - "}\n", - "train_pipeline={\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\",\n", - " \"recall\": 0.7,\n", - " },\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\"max_pairs\": 1e6},\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\"\n", - " },\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "eadc61f0-b869-49ac-bc3c-1d74f969198c", - "metadata": {}, - "source": [ - "## Splink" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "d0f5cb4d-ca66-4c00-8735-7df24163b676", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp = SplinkLinker(\n", - " name=\"exp_n2_splink_basic\",\n", - " dataset = Dataset(\n", - " star_id=54717,\n", - " star=star\n", - " ), \n", - " probabilities=probabilities, \n", - " clusters=clusters, \n", - " n=1,\n", - " db_path=du.DEFAULT_DUCKDB_PATH.as_posix(),\n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "id": "b504bec4-4c95-441a-8629-7a5fcc1f58cf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "cl_x_exp.get_data(\n", - " # sample=5,\n", - " cluster_select={\n", - " '\"companieshouse\".\"companies\"': [\n", - " \"company_name as company_name\",\n", - " \"postcode as postcode\"\n", - " ]\n", - " },\n", - " dim_select=[\n", - " \"id\",\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "id": "4fccd6e3-1938-4571-a793-5a053da82c5d", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:src.link.linker:Running pipeline\n", - "INFO:src.link.linker:Logging outputs to the Probabilities table\n", - "INFO:src.link.linker:Logging as MLflow experiment\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/create HTTP/1.1\" 200 1033\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-batch HTTP/1.1\" 200 2\n", - "INFO:src.link.linker:Running prepare() function\n", - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1846: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n", - "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n", - " return np.find_common_type(types, []) # type: ignore[arg-type]\n", - "INFO:splink.linker:Probability two random records match is estimated to be 2.25e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,448,386.84 are expected to match. With 1,396,647,305,670 total possible comparisons, we expect a total of around 313,967.14 matching pairs\n", - "INFO:splink.estimate_u:----- Estimating u probabilities using random sampling -----\n", - "INFO:splink.m_u_records_to_parameters:u probability not trained for company_name - Exact match (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "INFO:splink.estimate_u:\n", - "Estimated u probabilities using random sampling\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (some u values are not trained, no m values are trained).\n", - " - postcode (no m values are trained).\n", - "INFO:splink.em_training_session:\n", - "----- Starting EM training session -----\n", - "\n", - "INFO:splink.em_training_session:Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.company_name = r.company_name\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - company_name\n", - "INFO:splink.expectation_maximisation:\n", - "INFO:splink.expectation_maximisation:Iteration 1: Largest change in params was 0.42 in probability_two_random_records_match\n", - "INFO:splink.expectation_maximisation:Iteration 2: Largest change in params was -0.0984 in the m_probability of postcode, level `Exact match postcode`\n", - "INFO:splink.expectation_maximisation:Iteration 3: Largest change in params was -0.0612 in the m_probability of postcode, level `Exact match postcode`\n", - "INFO:splink.expectation_maximisation:Iteration 4: Largest change in params was 0.131 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 5: Largest change in params was 0.0392 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 6: Largest change in params was 0.00208 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 7: Largest change in params was 8.92e-05 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:\n", - "EM converged after 7 iterations\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (some u values are not trained, no m values are trained).\n", - "INFO:src.link.linker:Running link() function\n", - "WARNING:splink.linker:\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'company_name':\n", - " m values not fully trained\n", - "Comparison: 'company_name':\n", - " u values not fully trained\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/runs/get?run_uuid=9b8390845a1d4e46b92b77ca3ac0675e&run_id=9b8390845a1d4e46b92b77ca3ac0675e HTTP/1.1\" 200 1180\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"PUT /api/2.0/mlflow-artifacts/artifacts/4/9b8390845a1d4e46b92b77ca3ac0675e/artifacts/config/train_pipeline_ejq7d7ty.json HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"PUT /api/2.0/mlflow-artifacts/artifacts/4/9b8390845a1d4e46b92b77ca3ac0675e/artifacts/model/model_iwb3ikq3.json HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-parameter HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/update HTTP/1.1\" 200 430\n", - "INFO:src.link.linker:Writing parameters to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Writing metrics to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Writing artefacts to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Done!\n" - ] - } - ], - "source": [ - "cl_x_exp.evaluate(\n", - " link_experiment=\"cm_hmrc-trade-exporters\",\n", - " evaluation_description=\"Simple company name clean, nothing else\",\n", - " prepare_kwargs={\n", - " \"cluster_pipeline\": cluster_pipeline,\n", - " \"dim_pipeline\": dim_pipeline,\n", - " \"linker_settings\": linker_settings,\n", - " \"train_pipeline\": train_pipeline\n", - " },\n", - " link_kwargs={\n", - " \"threshold\": 0.7\n", - " },\n", - " report_dir=Path(\n", - " loc.PROJECT_DIR, \n", - " 'scratch', \n", - " 'reports', \n", - " 'cm_hmrc-trade-exporters',\n", - " 'splink'\n", - " ),\n", - " log_mlflow=True,\n", - " log_output=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "id": "30b105ca-d789-46c6-b5e0-0e357c368528", - "metadata": {}, - "outputs": [], - "source": [ - "clusters.add_clusters(\n", - " probabilities=probabilities,\n", - " models=cl_x_exp.name,\n", - " validation=validation,\n", - " n=cl_x_exp.n,\n", - " threshold=0.7,\n", - " add_unmatched_dims=True,\n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "id": "576b4b70-daf9-4cf4-90c5-dfbc9d98dee8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clustercompany_namesic_code_1address
0007d1c59-f9de-4296-be33-8e3ab5670764\"WORLD TO ME\" LIMITED46190 - Agents involved in the sale of a varie...None
1f96a7566-a196-4f04-a07f-f153eff1b7fb\"YES, DEAR!\" LIMITED59120 - Motion picture, video and television p...None
27df5fc6a-75cd-4437-a48d-55278f0cff7f#FOREVER20 FOUNDATION CIONone SuppliedNone
3f5af782f-30fd-47e5-a222-00e64a02b800& SO THEY MADE LTD47910 - Retail sale via mail order houses or v...None
405568913-e065-4f9c-9e0a-79aa992cbb1d& TONIC LIMITED73110 - Advertising agenciesNone
...............
2864123c9f641a-7788-4492-885e-b70a4cbf6845NoneNone[METRIC HOUSE, WESTMEAD INDUSTRIAL ESTATE, WES...
286413ba4036cb-0544-4018-9f9e-baf3969f064fNoneNone[70 ARMAGH ROAD, DUNGANNON]
286414b744abcb-efdb-4a05-8f9c-3ad0127750f8NoneNone[HM REVENUE AND CUSTOMS, RUBY HOUSE, 8 RUBY PL...
2864154608fe3d-a859-4d6d-91cf-ceac8e603d29NoneNone[THE OLD COACH HOUSE HORSE FA R, UGELEY, STAFF...
286416ed0972e6-9c5c-4505-b48f-40e1f9afdfd2NoneNone[HM REVENUE AND CUSTOMS, RUBY HOUSE, 8 RUBY PL...
\n", - "

286417 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " cluster company_name \\\n", - "0 007d1c59-f9de-4296-be33-8e3ab5670764 \"WORLD TO ME\" LIMITED \n", - "1 f96a7566-a196-4f04-a07f-f153eff1b7fb \"YES, DEAR!\" LIMITED \n", - "2 7df5fc6a-75cd-4437-a48d-55278f0cff7f #FOREVER20 FOUNDATION CIO \n", - "3 f5af782f-30fd-47e5-a222-00e64a02b800 & SO THEY MADE LTD \n", - "4 05568913-e065-4f9c-9e0a-79aa992cbb1d & TONIC LIMITED \n", - "... ... ... \n", - "286412 3c9f641a-7788-4492-885e-b70a4cbf6845 None \n", - "286413 ba4036cb-0544-4018-9f9e-baf3969f064f None \n", - "286414 b744abcb-efdb-4a05-8f9c-3ad0127750f8 None \n", - "286415 4608fe3d-a859-4d6d-91cf-ceac8e603d29 None \n", - "286416 ed0972e6-9c5c-4505-b48f-40e1f9afdfd2 None \n", - "\n", - " sic_code_1 \\\n", - "0 46190 - Agents involved in the sale of a varie... \n", - "1 59120 - Motion picture, video and television p... \n", - "2 None Supplied \n", - "3 47910 - Retail sale via mail order houses or v... \n", - "4 73110 - Advertising agencies \n", - "... ... \n", - "286412 None \n", - "286413 None \n", - "286414 None \n", - "286415 None \n", - "286416 None \n", - "\n", - " address \n", - "0 None \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None \n", - "... ... \n", - "286412 [METRIC HOUSE, WESTMEAD INDUSTRIAL ESTATE, WES... \n", - "286413 [70 ARMAGH ROAD, DUNGANNON] \n", - "286414 [HM REVENUE AND CUSTOMS, RUBY HOUSE, 8 RUBY PL... \n", - "286415 [THE OLD COACH HOUSE HORSE FA R, UGELEY, STAFF... \n", - "286416 [HM REVENUE AND CUSTOMS, RUBY HOUSE, 8 RUBY PL... \n", - "\n", - "[286417 rows x 4 columns]" - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clusters.get_data(\n", - " select={\n", - " '\"companieshouse\".\"companies\"': [\n", - " \"company_name\",\n", - " \"sic_code_1\"\n", - " ],\n", - " '\"hmrc\".\"trade__exporters\"': [\n", - " \"address\"\n", - " ]\n", - " },\n", - " sample=5\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "390b2c9f-9f82-41c1-a56d-8c405322487f", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "## First level functions (within `evaluate()`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7398eefd-d41d-4b2e-956b-3d9b933e9b9a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1846: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n", - "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n", - " return np.find_common_type(types, []) # type: ignore[arg-type]\n" - ] - } - ], - "source": [ - "cl_x_exp.prepare(\n", - " low_memory=True,\n", - " cluster_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - " },\n", - " dim_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - " },\n", - " linker_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.company_name = r.company_name)\n", - " and (\n", - " l.company_name <> ''\n", - " and r.company_name <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"company_name\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - " },\n", - " train_pipeline={\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\",\n", - " \"recall\": 0.7,\n", - " },\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\"max_pairs\": 1e6},\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\"\n", - " },\n", - " },\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a8bd7db5-6147-4872-81ea-20e51178e400", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.link(threshold=0.7, log_output=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6fc4e215-126b-4e8e-a8e1-b869f61efcd8", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.save(path=Path(loc.DATA_SUBDIR['raw'], 'ch_x_exp.pickle'))" - ] - }, - { - "cell_type": "markdown", - "id": "3aa27689-0331-415d-9107-abee6f625556", - "metadata": {}, - "source": [ - "## Second level functions (within `prepare()` and `link()`)" - ] - }, - { - "cell_type": "markdown", - "id": "d9254d63-d447-4abf-a894-39f9a0fbebdd", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### `prepare()` private methods" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "09843d9a-eb36-49c0-b32e-117b27850760", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp._clean_data(\n", - " cluster_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - " },\n", - " dim_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e815b1df-8142-496c-839b-ed1f5634ba14", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1846: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n", - "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n", - " return np.find_common_type(types, []) # type: ignore[arg-type]\n" - ] - } - ], - "source": [ - "cl_x_exp._substitute_ids()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "7d1b0797-fc7a-450e-9bad-3ca8b1b0beeb", - "metadata": {}, - "outputs": [], - "source": [ - "# def _register_tables(self):\n", - "# self.con.register('cls', self.cluster_processed)\n", - "# self.con.register('dim', self.dim_processed)\n", - "\n", - "# cl_x_exp._register_tables = types.MethodType(_register_tables, cl_x_exp)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0a5ac300-98d5-4505-84fa-ff838a879c35", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp._register_tables()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "efafdbcd-7091-40f8-b2e3-6dfe7072fd02", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp._create_linker(\n", - " linker_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.company_name = r.company_name)\n", - " and (\n", - " l.company_name <> ''\n", - " and r.company_name <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"company_name\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "340d1232-6b50-458f-9b53-711ae4a6ae85", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:splink.linker:Probability two random records match is estimated to be 2.25e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,447,653.50 are expected to match. With 2,764,864,733,924 total possible comparisons, we expect a total of around 621,645.71 matching pairs\n", - "INFO:splink.estimate_u:----- Estimating u probabilities using random sampling -----\n", - "INFO:splink.estimate_u:\n", - "Estimated u probabilities using random sampling\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (no m values are trained).\n", - " - postcode (no m values are trained).\n", - "INFO:splink.em_training_session:\n", - "----- Starting EM training session -----\n", - "\n", - "INFO:splink.em_training_session:Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.company_name = r.company_name\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - company_name\n", - "INFO:splink.expectation_maximisation:\n", - "INFO:splink.expectation_maximisation:Iteration 1: Largest change in params was 0.33 in probability_two_random_records_match\n", - "INFO:splink.expectation_maximisation:Iteration 2: Largest change in params was 0.123 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 3: Largest change in params was 0.0327 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 4: Largest change in params was 0.00215 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 5: Largest change in params was 0.000119 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 6: Largest change in params was 6.48e-06 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:\n", - "EM converged after 6 iterations\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (no m values are trained).\n" - ] - } - ], - "source": [ - "cl_x_exp._train_linker(\n", - " train_pipeline={\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\",\n", - " \"recall\": 0.7,\n", - " },\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\"max_pairs\": 1e6},\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\"\n", - " },\n", - " },\n", - " }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "af78b0ce-9ae9-4526-a655-5016c97a54af", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### `link()` private methods" - ] - }, - { - "cell_type": "markdown", - "id": "82abf325-fbd9-4c19-9b60-aa7506d7d814", - "metadata": {}, - "source": [ - "* Preds stuff\n", - " * Make preds\n", - " * Rejoin IDs\n", - " * Send to probs table\n", - " * Log params\n", - " * Log metrics (none yet, nothing to eval against)\n", - "* Model stuff\n", - " * Add model uuid to predictions table\n", - " * Add model table to hold model name\n", - " * Update unit tests to deal with this" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "33c5180b-f290-4750-b474-de013248ad69", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 10774831 entries, 0 to 10774830\n", - "Data columns (total 3 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 id int64 \n", - " 1 company_name object\n", - " 2 postcode object\n", - "dtypes: int64(1), object(2)\n", - "memory usage: 246.6+ MB\n", - "\n", - "RangeIndex: 256604 entries, 0 to 256603\n", - "Data columns (total 3 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 256604 non-null int64 \n", - " 1 company_name 256596 non-null object\n", - " 2 postcode 256604 non-null object\n", - "dtypes: int64(1), object(2)\n", - "memory usage: 5.9+ MB\n" - ] - } - ], - "source": [ - "cl_x_exp.cluster_processed.info()\n", - "cl_x_exp.dim_processed.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7a51ac76-0c5a-45b7-8632-a096f1bd015b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:splink.linker:\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'company_name':\n", - " m values not fully trained\n", - "Comparison: 'company_name':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "preds = cl_x_exp.linker.predict(threshold_match_probability=0.7)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "729069be-eb3e-44c0-9d3a-f5397ef0ca4f", - "metadata": {}, - "outputs": [], - "source": [ - "# {\"cluster\", \"id\", \"probability\", \"source\"}" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "73892295-4ca8-4775-934e-bfc0ff6d450a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
duckdb_idid
0051f3f15d-ea7c-44d9-889c-e6e77918e886
\n", - "
" - ], - "text/plain": [ - " duckdb_id id\n", - "0 0 51f3f15d-ea7c-44d9-889c-e6e77918e886" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cl_x_exp.id_lookup.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "e847a099-e283-4160-9286-be371b7228df", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clusteridprobabilitysource
03ce18bbd-ca73-4173-bb1f-91e0441e522332418080.99825154717
1a3f03e2d-f976-4596-b345-17b13946bd711916210.84384954717
28b37d820-cfdb-4c28-9910-c0fa6e1bf7731128950.99825154717
35259db87-4701-4209-9ac9-36b4c0d11dd729974990.99825154717
41bb90293-52dd-45a3-95f1-219264d7b76024112540.99738054717
\n", - "
" - ], - "text/plain": [ - " cluster id probability source\n", - "0 3ce18bbd-ca73-4173-bb1f-91e0441e5223 3241808 0.998251 54717\n", - "1 a3f03e2d-f976-4596-b345-17b13946bd71 191621 0.843849 54717\n", - "2 8b37d820-cfdb-4c28-9910-c0fa6e1bf773 112895 0.998251 54717\n", - "3 5259db87-4701-4209-9ac9-36b4c0d11dd7 2997499 0.998251 54717\n", - "4 1bb90293-52dd-45a3-95f1-219264d7b760 2411254 0.997380 54717" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "probs = (\n", - " preds\n", - " .as_pandas_dataframe()\n", - " .merge(\n", - " right=cl_x_exp.id_lookup.rename(columns={\"id\": \"cluster\"}),\n", - " how=\"left\",\n", - " left_on=\"id_l\",\n", - " right_on=\"duckdb_id\"\n", - " )\n", - " .merge(\n", - " right=cl_x_exp.id_lookup,\n", - " how=\"left\",\n", - " left_on=\"id_r\",\n", - " right_on=\"duckdb_id\"\n", - " )\n", - " .rename(\n", - " columns={\n", - " \"match_probability\": \"probability\"\n", - " }\n", - " )\n", - ")[['cluster', 'id', 'probability']]\n", - "probs[\"source\"] = cl_x_exp.dataset.id\n", - "probs.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "cb198fb6-7a5f-42fa-bc16-dda1f325e4ca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
clusteridprobabilitysourceuuidlink_type
03ce18bbd-ca73-4173-bb1f-91e0441e522332418080.9982515471751d9eeff-46d4-48be-b7cc-24161549450dlink
1a3f03e2d-f976-4596-b345-17b13946bd711916210.84384954717b222c116-2ba4-48d0-9696-f758b0b9c2cclink
28b37d820-cfdb-4c28-9910-c0fa6e1bf7731128950.9982515471710319d55-52b2-43e7-8afe-95ab59a76fa1link
35259db87-4701-4209-9ac9-36b4c0d11dd729974990.99825154717fc2ce3e6-1ac2-433d-b8c5-e4bdc405dea8link
41bb90293-52dd-45a3-95f1-219264d7b76024112540.997380547178717545c-219f-4208-a8b5-df4eb2234e56link
.....................
6707b4aac6e-1f05-4665-9631-5d33fd4632db32706890.8055095471791fb7cdd-9fff-4c38-9956-ff4ae48853f4link
671fd2b2db0-ba8a-4559-831e-06f7574d6e0d1606810.8055095471719bc1708-5513-489d-a356-3f3cb17ce0c2link
672841949c0-3a7c-4dae-afbf-8b5d13b1117f16826570.80550954717fd48fa28-5f17-496d-b11b-143c4dba5651link
673841949c0-3a7c-4dae-afbf-8b5d13b1117f31937420.80550954717b04ca25b-6338-4c0b-b3af-a52ff15541felink
674de0cbf64-20e3-4de8-b01b-7e1289a35f6627513130.8055095471700a8cf34-bb72-4266-855d-e8e1c28e0af7link
\n", - "

675 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " cluster id probability source \\\n", - "0 3ce18bbd-ca73-4173-bb1f-91e0441e5223 3241808 0.998251 54717 \n", - "1 a3f03e2d-f976-4596-b345-17b13946bd71 191621 0.843849 54717 \n", - "2 8b37d820-cfdb-4c28-9910-c0fa6e1bf773 112895 0.998251 54717 \n", - "3 5259db87-4701-4209-9ac9-36b4c0d11dd7 2997499 0.998251 54717 \n", - "4 1bb90293-52dd-45a3-95f1-219264d7b760 2411254 0.997380 54717 \n", - ".. ... ... ... ... \n", - "670 7b4aac6e-1f05-4665-9631-5d33fd4632db 3270689 0.805509 54717 \n", - "671 fd2b2db0-ba8a-4559-831e-06f7574d6e0d 160681 0.805509 54717 \n", - "672 841949c0-3a7c-4dae-afbf-8b5d13b1117f 1682657 0.805509 54717 \n", - "673 841949c0-3a7c-4dae-afbf-8b5d13b1117f 3193742 0.805509 54717 \n", - "674 de0cbf64-20e3-4de8-b01b-7e1289a35f66 2751313 0.805509 54717 \n", - "\n", - " uuid link_type \n", - "0 51d9eeff-46d4-48be-b7cc-24161549450d link \n", - "1 b222c116-2ba4-48d0-9696-f758b0b9c2cc link \n", - "2 10319d55-52b2-43e7-8afe-95ab59a76fa1 link \n", - "3 fc2ce3e6-1ac2-433d-b8c5-e4bdc405dea8 link \n", - "4 8717545c-219f-4208-a8b5-df4eb2234e56 link \n", - ".. ... ... \n", - "670 91fb7cdd-9fff-4c38-9956-ff4ae48853f4 link \n", - "671 19bc1708-5513-489d-a356-3f3cb17ce0c2 link \n", - "672 fd48fa28-5f17-496d-b11b-143c4dba5651 link \n", - "673 b04ca25b-6338-4c0b-b3af-a52ff15541fe link \n", - "674 00a8cf34-bb72-4266-855d-e8e1c28e0af7 link \n", - "\n", - "[675 rows x 6 columns]" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "probabilities.add_probabilities(probs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7717d9d8-2584-4ad6-8316-0e470fd9ec66", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.id_lookup" - ] - }, - { - "cell_type": "markdown", - "id": "385ce74b-d023-43c9-a3bf-6c25aa85807f", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### `evaluate()`\n", - "\n", - "Implemented in the Linker class, this should log stuff to MLFlow. Let's fix it." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "7ff8a268-d9f3-464d-9068-c923192e7c70", - "metadata": {}, - "outputs": [], - "source": [ - "cluster_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - "}\n", - "dim_pipeline={\n", - " \"clean_comp_names\": {\n", - " \"function\": clean_comp_names,\n", - " \"arguments\": {\n", - " \"primary_col\": \"company_name\",\n", - " \"secondary_col\": None,\n", - " \"stopwords\": stopwords,\n", - " },\n", - " }\n", - "}\n", - "linker_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.company_name = r.company_name)\n", - " and (\n", - " l.company_name <> ''\n", - " and r.company_name <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"company_name\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - "}\n", - "train_pipeline={\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\",\n", - " \"recall\": 0.7,\n", - " },\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\"max_pairs\": 1e6},\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\"\n", - " },\n", - " },\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e4becfd6-ea66-4556-ad64-7f59d881dc2f", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:src.link.linker:Running pipeline\n", - "INFO:src.link.linker:Logging as MLflow experiment\n", - "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): mlflow--data-science.data.trade.gov.uk:8004\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=cm_hmrc-trade-exporters HTTP/1.1\" 200 245\n", - "DEBUG:git.util:Failed checking if running in CYGWIN due to: FileNotFoundError(2, 'No such file or directory')\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/create HTTP/1.1\" 200 1015\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-batch HTTP/1.1\" 200 2\n", - "INFO:src.link.linker:Running prepare() function\n", - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1846: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n", - "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n", - " return np.find_common_type(types, []) # type: ignore[arg-type]\n", - "INFO:splink.linker:Probability two random records match is estimated to be 2.49e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,010,823.56 are expected to match. With 6,778,291,809 total possible comparisons, we expect a total of around 1,690.00 matching pairs\n", - "INFO:splink.estimate_u:----- Estimating u probabilities using random sampling -----\n", - "INFO:splink.estimate_u:\n", - "Estimated u probabilities using random sampling\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (no m values are trained).\n", - " - postcode (no m values are trained).\n", - "INFO:splink.em_training_session:\n", - "----- Starting EM training session -----\n", - "\n", - "INFO:splink.em_training_session:Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.company_name = r.company_name\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - company_name\n", - "INFO:splink.expectation_maximisation:\n", - "INFO:splink.expectation_maximisation:Iteration 1: Largest change in params was 0.364 in probability_two_random_records_match\n", - "INFO:splink.expectation_maximisation:Iteration 2: Largest change in params was 0.136 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 3: Largest change in params was 0.0212 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 4: Largest change in params was 0.000885 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:Iteration 5: Largest change in params was 3.34e-05 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "INFO:splink.expectation_maximisation:\n", - "EM converged after 5 iterations\n", - "INFO:splink.settings:\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (no m values are trained).\n", - "INFO:src.link.linker:Running link() function\n", - "WARNING:splink.linker:\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'company_name':\n", - " m values not fully trained\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"GET /api/2.0/mlflow/runs/get?run_uuid=bd60d218376e40e08cf9ffebce65e652&run_id=bd60d218376e40e08cf9ffebce65e652 HTTP/1.1\" 200 1162\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"PUT /api/2.0/mlflow-artifacts/artifacts/4/bd60d218376e40e08cf9ffebce65e652/artifacts/config/train_pipeline_o2jecl9l.json HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"PUT /api/2.0/mlflow-artifacts/artifacts/4/bd60d218376e40e08cf9ffebce65e652/artifacts/model/model_8c2ppgt0.json HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/log-parameter HTTP/1.1\" 200 2\n", - "DEBUG:urllib3.connectionpool:Resetting dropped connection: mlflow--data-science.data.trade.gov.uk\n", - "DEBUG:urllib3.connectionpool:http://mlflow--data-science.data.trade.gov.uk:8004 \"POST /api/2.0/mlflow/runs/update HTTP/1.1\" 200 421\n", - "INFO:src.link.linker:Writing parameters to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Writing metrics to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Writing artefacts to /home/jovyan/company-matching/scratch/reports/cm_hmrc-trade-exporters\n", - "INFO:src.link.linker:Done!\n" - ] - } - ], - "source": [ - "cl_x_exp.evaluate(\n", - " link_experiment=\"cm_hmrc-trade-exporters\",\n", - " evaluation_name=\"Basic link\",\n", - " evaluation_description=\"Simple company name clean, nothing else\",\n", - " prepare_kwargs={\n", - " \"cluster_pipeline\": cluster_pipeline,\n", - " \"dim_pipeline\": dim_pipeline,\n", - " \"linker_settings\": linker_settings,\n", - " \"train_pipeline\": train_pipeline\n", - " },\n", - " link_kwargs={\n", - " \"threshold\": 0.7\n", - " },\n", - " report_dir=Path(loc.PROJECT_DIR, 'scratch', 'reports', 'cm_hmrc-trade-exporters'),\n", - " log_mlflow=True,\n", - " log_output=False,\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_linker-matching.ipynb b/notebooks/models/WL_linker-matching.ipynb deleted file mode 100644 index 5d31c92..0000000 --- a/notebooks/models/WL_linker-matching.ipynb +++ /dev/null @@ -1,276 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "fc07efe0-5cb8-47bb-87b4-ab6f4a475f4e", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame, display\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "markdown", - "id": "7c6625ba-1e67-45a6-8c13-a3f0b20d023d", - "metadata": {}, - "source": [ - "# 🔌Hybrid additive linker playground\n", - "\n", - "Just a place to get linkers running." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6bb13950-4a12-4f3a-b27e-212984ec41e5", - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'src.link.splink_linker'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclusters\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Clusters\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvalidation\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Validation\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlink\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msplink_linker\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SplinkLinker\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m link_pipeline, stopwords\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfeatures\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclean_complex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m clean_comp_names\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src.link.splink_linker'" - ] - } - ], - "source": [ - "from cmf.data import utils as du\n", - "from cmf.data.star import Star\n", - "from cmf.data.datasets import Dataset\n", - "from cmf.data.probabilities import Probabilities\n", - "from cmf.data.clusters import Clusters\n", - "from cmf.data.validation import Validation\n", - "from cmf.link.splink_linker import SplinkLinker\n", - "from cmf.config import link_pipeline, stopwords\n", - "from cmf.features.clean_complex import clean_comp_names\n", - "\n", - "import splink.duckdb.comparison_library as cl\n", - "import splink.duckdb.comparison_template_library as ctl\n", - "\n", - "import uuid\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "\n", - "dotenv_path = find_dotenv()\n", - "load_dotenv(dotenv_path)" - ] - }, - { - "cell_type": "markdown", - "id": "420e71d7-752e-4df2-a474-1288f6f69812", - "metadata": {}, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa975078-979e-4a98-bbea-1df8a21b57d8", - "metadata": {}, - "outputs": [], - "source": [ - "star = Star(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"STAR_TABLE\")\n", - ")\n", - "probabilities = Probabilities(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"PROBABILITIES_TABLE\"),\n", - " star = star\n", - ")\n", - "clusters = Clusters(\n", - " schema = os.getenv(\"SCHEMA\"),\n", - " table = os.getenv(\"CLUSTERS_TABLE\"),\n", - " star = star\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73ca90a5-3940-4225-ba87-cf8cbe90eda3", - "metadata": {}, - "outputs": [], - "source": [ - "# probabilities.create(overwrite=True)\n", - "# clusters.create(dim=1970, overwrite=False)" - ] - }, - { - "cell_type": "markdown", - "id": "eadc61f0-b869-49ac-bc3c-1d74f969198c", - "metadata": {}, - "source": [ - "## Splink\n", - "\n", - "TODO:\n", - "\n", - "* Write the `linker.link` method\n", - "* Run it\n", - "* Load it into clusters\n", - "* Add `cluster_select`s to the `link_pipeline` in config\n", - "\n", - "At that point I think we've got enough infra to MR the whole of this, linker, data and all. Not a pretty MR, a lot to chew, sorry reviewer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "835f621d-f0b6-4b42-a305-264560c40dab", - "metadata": {}, - "outputs": [], - "source": [ - "# '\"hmrc\".\"trade__exporters\"': {\n", - "# \"fact\": '\"hmrc\".\"trade__exporters\"',\n", - "# \"key_fields\": [\"company_name\", \"address\", \"postcode\"],\n", - "# \"dim\": f'\"{os.getenv(\"SCHEMA\")}\".\"hmrc_trade__exporters__dim\"',\n", - "# \"n\": 3,\n", - "# \"experiment\": \"cm_hmrc-trade-exporters\",\n", - "# }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b504bec4-4c95-441a-8629-7a5fcc1f58cf", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp = SplinkLinker(\n", - " dataset = Dataset(\n", - " star_id=54717,\n", - " star=star\n", - " ), \n", - " probabilities=probabilities, \n", - " clusters=clusters, \n", - " n=2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "98e870da-cfe1-48d3-810b-d3f924620f58", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp.get_data(\n", - " cluster_select={\n", - " '\"companieshouse\".\"companies\"': [\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ]\n", - " },\n", - " dim_select=[\n", - " \"company_name\",\n", - " \"postcode\"\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efafdbcd-7091-40f8-b2e3-6dfe7072fd02", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp._create_linker(\n", - " linker_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.name_unusual_tokens = r.name_unusual_tokens)\n", - " and (\n", - " l.name_unusual_tokens <> ''\n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"name_unusual_tokens\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "340d1232-6b50-458f-9b53-711ae4a6ae85", - "metadata": {}, - "outputs": [], - "source": [ - "cl_x_exp._train_linker(\n", - " train_pipeline={\n", - " \"estimate_probability_two_random_records_match\": {\n", - " \"function\": \"estimate_probability_two_random_records_match\",\n", - " \"arguments\": {\n", - " \"deterministic_matching_rules\": \"\"\"\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \"\"\",\n", - " \"recall\": 0.7,\n", - " },\n", - " },\n", - " \"estimate_u_using_random_sampling\": {\n", - " \"function\": \"estimate_u_using_random_sampling\",\n", - " \"arguments\": {\"max_pairs\": 1e6},\n", - " },\n", - " \"estimate_parameters_using_expectation_maximisation\": {\n", - " \"function\": \"estimate_parameters_using_expectation_maximisation\",\n", - " \"arguments\": {\n", - " \"blocking_rule\": \"\"\"\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " \"\"\"\n", - " },\n", - " },\n", - " }\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/WL_live-matching.ipynb b/notebooks/models/WL_live-matching.ipynb deleted file mode 100644 index a5d8a44..0000000 --- a/notebooks/models/WL_live-matching.ipynb +++ /dev/null @@ -1,440 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "faa1b4bb-7287-4a92-82a6-6f25dccb6953", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d09cc18b-6c64-4ce1-8849-463f09c929f1", - "metadata": {}, - "outputs": [], - "source": [ - "import connectorx as cx\n", - "import os\n", - "import pandas as pd\n", - "\n", - "import cmf.data.utils as du" - ] - }, - { - "cell_type": "markdown", - "id": "f7ee79a5-e210-4aaa-b0f9-7602a02c70bf", - "metadata": {}, - "source": [ - "# Deployed lookup testing\n", - "\n", - "Using this to both refine lookup speed and boilerplate code for achieving our target tasks:\n", - "\n", - "* Joining multiple tables with duplicates in the source and targets\n", - "* Joining multiple tables with duplicates in the source, but the best match in the target\n", - "* Finding duplicates in the source\n", - "\n", - "It's worth noting the below is just for raw data extraction. Especially when duplicates are involved, you'd expect the below to become aggregation queries, which will slow stuff up." - ] - }, - { - "cell_type": "markdown", - "id": "a110a777-c452-4a1f-ae9c-9ca2d7b0a1df", - "metadata": {}, - "source": [ - "## With dupes in both source and target\n", - "\n", - "Joining data from three medium-sized tables.\n", - "\n", - "V1 index is two multicolumn b-tree indices on `source`/`target` and `source_id`/`target_id`.\n", - "\n", - "| `return_type` | lib | index on lookup | time (seconds) |\n", - "| --- | --- | --- | --- |\n", - "| None | pgAdmin | no | 53 |\n", - "| `arrow` | `connectorx` | no | 52 |\n", - "| `pandas` | `connectorx` | no | 115 |\n", - "| `pandas` | `pandas=1.3.5` | no | 72 |\n", - "| None | pgAdmin | v1 | 15 |\n", - "| `arrow` | `connectorx` | v1 | 10 |\n", - "| `pandas` | `connectorx` | v1 | 15 |\n", - "| `pandas` | `pandas=1.3.5` | v1 | 17 |" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "19730bc7-ca7f-4798-83df-691d66d1355c", - "metadata": {}, - "outputs": [], - "source": [ - "sql = \"\"\"\n", - " select\n", - " ch.id,\n", - " ch.company_name as ch_name,\n", - " dh.name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select \n", - " *\n", - " from\n", - " _user_eaf4fd9a.lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " ) lookup\n", - " right outer join companieshouse.companies ch on\n", - " lookup.source_id = ch.id::text\n", - " and lookup.source = 'companieshouse_companies'\n", - " left join dit.data_hub__companies dh on\n", - " lookup.target_id = dh.id::text\n", - " and lookup.target = 'dit_data_hub__companies'\n", - " left join dit.export_wins__wins_dataset ew on\n", - " lookup.target_id = ew.id::text\n", - " and lookup.target = 'dit_export_wins__wins_dataset' \n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "18b9430f-2136-40da-a845-2823fffbd474", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 5.07 s, sys: 375 ms, total: 5.44 s\n", - "Wall time: 10.6 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "df = cx.read_sql(\n", - " conn = f\"postgres://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\",\n", - " query = sql,\n", - " return_type = \"arrow\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2864b6ad-7e31-4c92-90e3-5483e9c2b820", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 5.39 s, sys: 673 ms, total: 6.07 s\n", - "Wall time: 17.3 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "df = cx.read_sql(\n", - " conn = f\"postgres://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\",\n", - " query = sql,\n", - " return_type = \"pandas\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "c4bfd7a0-b966-458d-8fb3-e90d69d42961", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 10.7 s, sys: 1.44 s, total: 12.2 s\n", - "Wall time: 19 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "with du.sql_engine.connect() as connection:\n", - " df = pd.read_sql(\n", - " sql, \n", - " connection\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "93ed8dd3-1692-40d7-a52c-b5d80b6b49a0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 5379033 entries, 0 to 5379032\n", - "Data columns (total 4 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 id object\n", - " 1 ch_name object\n", - " 2 dh_name object\n", - " 3 ew_name object\n", - "dtypes: object(4)\n", - "memory usage: 164.2+ MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "markdown", - "id": "bb9a0c2a-dd8f-448c-afd2-398199044eac", - "metadata": {}, - "source": [ - "## Without dupes in target, with dupes in source\n", - "\n", - "Top result only from targets, ignores duplicates in source. Same data as above. No non-indexed benchmark, sorry.\n", - "\n", - "When using a deduped source, this will result in unique source to top result in target.\n", - "\n", - "V1 index is two multicolumn b-tree indices on `source`/`target` and `source_id`/`target_id`.\n", - "\n", - "| `return_type` | lib | index on lookup | time (seconds) |\n", - "| --- | --- | --- | --- |\n", - "| None | pgAdmin | v1 | 27 |\n", - "| `arrow` | `connectorx` | v1 | 12 |\n", - "| `pandas` | `connectorx` | v1 | 22 |\n", - "| `pandas` | `pandas=1.3.5` | v1 | 21 |" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "4af06827-74a4-4b2c-b045-670d2b15ee33", - "metadata": {}, - "outputs": [], - "source": [ - "sql = \"\"\"\n", - " select\n", - " ch.id,\n", - " ch.company_name as ch_name,\n", - " dh.name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select\n", - " companieshouse_companies,\n", - " max(dit_data_hub__companies) as dit_data_hub__companies,\n", - " max(dit_export_wins__wins_dataset) as dit_export_wins__wins_dataset\n", - " from crosstab(\n", - " 'select distinct on (target, target_id)\n", - " source_id,\n", - " target,\n", - " target_id\n", - " from (\n", - " select distinct on (source_id, target)\n", - " source_id,\n", - " target,\n", - " target_id,\n", - " match_probability\n", - " from\n", - " \"_user_eaf4fd9a\".\"lookup\" lookup\n", - " where\n", - " lookup.source = ''companieshouse_companies''\n", - " and lookup.target in (\n", - " ''dit_data_hub__companies'',\n", - " ''dit_export_wins__wins_dataset''\n", - " )\n", - " order by\n", - " source_id,\n", - " target,\n", - " target_id,\n", - " match_probability desc\n", - " ) lookup\n", - " order by\n", - " target, \n", - " target_id,\n", - " match_probability',\n", - " $$ values\n", - " ('dit_data_hub__companies'::text), \n", - " ('dit_export_wins__wins_dataset'::text)\n", - " $$ \n", - " ) as ct (\n", - " \"companieshouse_companies\" text,\n", - " \"dit_data_hub__companies\" text,\n", - " \"dit_export_wins__wins_dataset\" text\n", - " )\n", - " group by\n", - " companieshouse_companies\n", - " ) lookup\n", - " right outer join companieshouse.companies ch on\n", - " lookup.companieshouse_companies = ch.id::text\n", - " left join dit.data_hub__companies dh on\n", - " lookup.dit_data_hub__companies = dh.id::text\n", - " left join dit.export_wins__wins_dataset ew on\n", - " lookup.dit_export_wins__wins_dataset = ew.id::text\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "c84c38c1-95a2-41ee-9661-2fa8617cf006", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 3.61 s, sys: 192 ms, total: 3.81 s\n", - "Wall time: 12.4 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "df = cx.read_sql(\n", - " conn = f\"postgres://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\",\n", - " query = sql,\n", - " return_type = \"arrow\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "e73ed80c-5d33-445d-9039-8cbe9fe9572d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 4.07 s, sys: 579 ms, total: 4.65 s\n", - "Wall time: 22.4 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "df = cx.read_sql(\n", - " conn = f\"postgres://{os.environ['PGUSER']}:{os.environ['PGPASSWORD']}@{os.environ['PGHOST']}:{os.environ['PGPORT']}/{os.environ['PGDATABASE']}\",\n", - " query = sql,\n", - " return_type = \"pandas\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "bd575c52-f361-47d3-9baa-e44340483199", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9.6 s, sys: 1.02 s, total: 10.6 s\n", - "Wall time: 21 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "with du.sql_engine.connect() as connection:\n", - " df = pd.read_sql(\n", - " sql, \n", - " connection\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "6d44bbae-fd20-4939-ad1a-2ef53fba2eec", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 5359637 entries, 0 to 5359636\n", - "Data columns (total 4 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 id object\n", - " 1 ch_name object\n", - " 2 dh_name object\n", - " 3 ew_name object\n", - "dtypes: object(4)\n", - "memory usage: 163.6+ MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "markdown", - "id": "4f3f6b4b-c8dc-4093-a6f4-361c323bd5cd", - "metadata": {}, - "source": [ - "## Finding duplicates in source\n", - "\n", - "V1 index is two multicolumn b-tree indices on `source`/`target` and `source_id`/`target_id`.\n", - "\n", - "| `return_type` | lib | index on lookup | time (seconds) |\n", - "| --- | --- | --- | --- |\n", - "| None | pgAdmin | v1 | x |\n", - "| `arrow` | `connectorx` | v1 | x |\n", - "| `pandas` | `connectorx` | v1 | x |\n", - "| `pandas` | `pandas=1.3.5` | v1 | x |" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_SplinkEG.ipynb b/notebooks/models/splink/WL_SplinkEG.ipynb deleted file mode 100644 index 9b5951a..0000000 --- a/notebooks/models/splink/WL_SplinkEG.ipynb +++ /dev/null @@ -1,2027 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "id": "6378293c-5aa1-4bdc-bd8c-88c41748257f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "ad802486-6241-4122-a567-8c690c3ab65f", - "metadata": {}, - "outputs": [], - "source": [ - "import duckdb\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "import splink.duckdb.comparison_library as cl\n", - "import splink.duckdb.comparison_template_library as ctl\n", - "\n", - "import altair as alt\n", - "alt.renderers.enable(\"mimetype\")\n", - "\n", - "from cmf.data import utils as du\n", - "from cmf.features.clean_basic import (\n", - " clean_company_name,\n", - " clean_stopwords,\n", - " list_join_to_string\n", - ")\n", - "import cmf.locations as loc\n", - "from cmf.config import stopwords" - ] - }, - { - "cell_type": "markdown", - "id": "3fa7ab19-b193-4245-8223-e9bbb503d49f", - "metadata": {}, - "source": [ - "# Splink example\n", - "\n", - "Linking 5% samples from Companies House and HMRC Exporters to show a Splink workflow." - ] - }, - { - "cell_type": "markdown", - "id": "da55262c-f283-4891-9ac6-ed433e843870", - "metadata": {}, - "source": [ - "## Get data\n", - "\n", - "We can get data directly from Companies House, but HMRC Exporters contains duplicated entities. Splink requires indepdendence of observations. I therefore am reading in a very roughly deduped version of the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "accd8ba9-8286-4578-b724-d9ecf642135f", - "metadata": {}, - "outputs": [], - "source": [ - "ch_raw = du.query(\n", - "f\"\"\"\n", - "select\n", - " id,\n", - " company_name,\n", - " postcode\n", - "from\n", - " companieshouse.companies tablesample system (5);\n", - "\"\"\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "a640883f-45b6-44ce-9b8a-61997eb9e80b", - "metadata": {}, - "outputs": [], - "source": [ - "exp_raw = du.query(\n", - "f\"\"\"\n", - "select\n", - " id,\n", - " company_name,\n", - " postcode\n", - "from\n", - " _user_eaf4fd9a.hmrc_trade__exporters__dim tablesample system (5);\n", - "\"\"\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "bd9b1c40-b13a-4af0-9658-3fcfd3c1b362", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 272681 entries, 0 to 272680\n", - "Data columns (total 3 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 272681 non-null object\n", - " 1 company_name 272681 non-null object\n", - " 2 postcode 272681 non-null object\n", - "dtypes: object(3)\n", - "memory usage: 6.2+ MB\n", - "\n", - "RangeIndex: 12739 entries, 0 to 12738\n", - "Data columns (total 3 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 12739 non-null int64 \n", - " 1 company_name 12739 non-null object\n", - " 2 postcode 12739 non-null object\n", - "dtypes: int64(1), object(2)\n", - "memory usage: 298.7+ KB\n" - ] - } - ], - "source": [ - "ch_raw.info()\n", - "exp_raw.info()" - ] - }, - { - "cell_type": "markdown", - "id": "236d8bf7-d0e0-46a7-911d-ac4701545d36", - "metadata": {}, - "source": [ - "## Clean data\n", - "\n", - "duckDB allows you to use SQL on pandas dataframes. I'm using it because it means we can use the same cleaning functions on the remote Postgres database as we use to clean stuff in-memory here. It's efficient.\n", - "\n", - "I'm also using simple cleaning functions to be explicit -- we have more complex ones available.\n", - "\n", - "Adding more functions to `src.features.clean_basic` and `src.features.clean_complex` is a core part of the task." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "6f5992cf-3cb0-447a-a6cc-e1a6315265cd", - "metadata": {}, - "outputs": [], - "source": [ - "ch_clean_step_1 = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {clean_company_name(\"company_name\")} as company_name,\n", - " postcode\n", - " from\n", - " ch_raw\n", - "\"\"\"\n", - ").df()\n", - "ch_clean_step_2 = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {clean_stopwords(\"company_name\", stopwords=stopwords)} as company_name,\n", - " postcode\n", - " from\n", - " ch_clean_step_1\n", - "\"\"\"\n", - ").df()\n", - "ch_clean = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {list_join_to_string(\"company_name\")} as company_name,\n", - " postcode\n", - " from\n", - " ch_clean_step_2\n", - "\"\"\"\n", - ").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "7e52f064-573a-49b4-8778-80efdc02bb33", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
20745013127527roj madeSW6 1NY
2122203376471arnold green pension scheme trusteesEC3M 5JE
27108709198508yvonne school wearEN4 8RQ
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "207450 13127527 roj made SW6 1NY\n", - "21222 03376471 arnold green pension scheme trustees EC3M 5JE\n", - "271087 09198508 yvonne school wear EN4 8RQ" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ch_clean.sample(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "a992614a-5843-4f29-aae4-3239c5e849b4", - "metadata": {}, - "outputs": [], - "source": [ - "exp_clean_step_1 = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {clean_company_name(\"company_name\")} as company_name,\n", - " postcode\n", - " from\n", - " exp_raw\n", - "\"\"\"\n", - ").df()\n", - "exp_clean_step_2 = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {clean_stopwords(\"company_name\", stopwords=stopwords)} as company_name,\n", - " postcode\n", - " from\n", - " exp_clean_step_1\n", - "\"\"\"\n", - ").df()\n", - "exp_clean = duckdb.sql(\n", - "f\"\"\"\n", - " select\n", - " id,\n", - " {list_join_to_string(\"company_name\")} as company_name,\n", - " postcode\n", - " from\n", - " exp_clean_step_2\n", - "\"\"\"\n", - ").df()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "8b19e6d3-771e-4527-a50c-59f5712563c9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompany_namepostcode
3596567601eye emporiumE3 5LH
111331907344tecnicas reunidasSW1Y 4LB
119802476184van rees north americaAB10 1ZP
\n", - "
" - ], - "text/plain": [ - " id company_name postcode\n", - "3596 567601 eye emporium E3 5LH\n", - "11133 1907344 tecnicas reunidas SW1Y 4LB\n", - "11980 2476184 van rees north america AB10 1ZP" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "exp_clean.sample(3)" - ] - }, - { - "cell_type": "markdown", - "id": "60c4ae37-350b-4b10-af0b-4330637f9ef1", - "metadata": {}, - "source": [ - "## Set linker up\n", - "\n", - "Here you can see the levels we discussed in the \"comparisons\" part of the settings dictionary. `cl` and `ctl` are the Comparisons Library and Comparisons Template Library, tools Splink provides so you can make some basic matches right away.\n", - "\n", - "We'd look to do something more bespoke for addresses.\n", - "\n", - "Blocking rules will become extremely important. Strictly speaking we want to calculate the probability that every record matches every other record between tables, but this is exponentially expensive to compute. Blocking rules help Splink compare only things that stand a chance of being the same. Here the name OR postcode must match for us to bother comparing." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "bdd616ae-8fa7-4bb1-8a6b-639304c31b89", - "metadata": {}, - "outputs": [], - "source": [ - "settings = {\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"id\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " (l.company_name = r.company_name)\n", - " and (\n", - " l.company_name <> ''\n", - " and r.company_name <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.postcode = r.postcode)\n", - " and (\n", - " l.postcode <> ''\n", - " and r.postcode <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"company_name\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\"),\n", - " ],\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "b255b3e0-79c4-419d-ac19-56a9748d9518", - "metadata": {}, - "outputs": [], - "source": [ - "linker = DuckDBLinker(\n", - " input_table_or_tables=[\n", - " ch_clean,\n", - " exp_clean\n", - " ],\n", - " input_table_aliases=[\n", - " \"ch\",\n", - " \"exp\"\n", - " ],\n", - " settings_dict=settings,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "64d932c5-cbeb-49b0-ac9e-4b22dc333fb5", - "metadata": {}, - "source": [ - "## Train linker\n", - "\n", - "Here's where we do some of the stuff I didn't cover -- estimating probabilities. You'd be free to play with this methodology a little, but I'd expect you'd land on something common to all the different cleaning methods you tried, so it'd be a one time thing." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "924b6da7-5006-4320-ac7f-98eb0ecb542a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 2.28e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 4,381,222.13 are expected to match. With 3,473,683,259 total possible comparisons, we expect a total of around 792.86 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " deterministic_matching_rules=\"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\",\n", - " recall=0.7\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "4ebc7570-b1a1-4815-840b-a790bfe2a0c3", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for company_name - Exact match (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for company_name - Jaro_winkler_similarity >= 0.9 (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (some u values are not trained, no m values are trained).\n", - " - postcode (no m values are trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(\n", - " max_pairs=1e5\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "038470cd-ab7d-4ec5-a089-b1ff658965bf", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.company_name = r.company_name\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - company_name\n", - "\n", - "Iteration 1: Largest change in params was 0.281 in probability_two_random_records_match\n", - "Iteration 2: Largest change in params was 0.166 in probability_two_random_records_match\n", - "Iteration 3: Largest change in params was -0.0605 in the m_probability of postcode, level `Exact match postcode`\n", - "Iteration 4: Largest change in params was 0.131 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 5: Largest change in params was 0.066 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 6: Largest change in params was 0.00455 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 7: Largest change in params was 0.000219 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "Iteration 8: Largest change in params was 1.03e-05 in the m_probability of postcode, level `Exact match Postcode Area`\n", - "\n", - "EM converged after 8 iterations\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (some u values are not trained, no m values are trained).\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.estimate_parameters_using_expectation_maximisation(\n", - " blocking_rule=\"\"\"\n", - " l.company_name = r.company_name\n", - " \"\"\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "285485e4-b087-4612-a0d8-8a77cc5af30b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.postcode = r.postcode\n", - " \n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - company_name\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - postcode\n", - "\n", - "Iteration 1: Largest change in params was 0.0497 in the m_probability of company_name, level `Exact match`\n", - "Iteration 2: Largest change in params was 0.000752 in probability_two_random_records_match\n", - "Iteration 3: Largest change in params was 0.000241 in probability_two_random_records_match\n", - "Iteration 4: Largest change in params was 2.74e-06 in probability_two_random_records_match\n", - "\n", - "EM converged after 4 iterations\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - company_name (some u values are not trained).\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.estimate_parameters_using_expectation_maximisation(\n", - " blocking_rule=\"\"\"\n", - " l.postcode = r.postcode\n", - " \"\"\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "8c2c3fd3-8b07-4bac-ae5d-f71ec603a2ab", - "metadata": {}, - "source": [ - "## Predict links\n", - "\n", - "We've fitted the model. Let's calculate the probabilities records are linked, and keep any over 0.7. Note we expect awful performance as this is a tiny sample." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "627250a2-8884-4259-a2a6-73aab837c374", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'company_name':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "predictions_duckdb = linker.predict(threshold_match_probability=0.7)\n", - "predictions = predictions_duckdb.as_pandas_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "28ff657a-f1d6-400b-bf71-53ffe8c78167", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_rid_lid_rmatch_key
07.2412910.993434chexp1079624011891960
13.9043850.937398chexp1276852531025060
27.8262530.995613chexp1184753722870410
37.2412910.993434chexp0306953528006870
47.2412910.993434chexp1079624019247380
57.8262530.995613chexp0348886212777940
67.8262530.995613chexp0649827926674820
76.5043250.989105chexp0712747633977810
86.5043250.989105chexp0712747629479200
92.5824570.856930chexp071274766315660
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l source_dataset_r \\\n", - "0 7.241291 0.993434 ch exp \n", - "1 3.904385 0.937398 ch exp \n", - "2 7.826253 0.995613 ch exp \n", - "3 7.241291 0.993434 ch exp \n", - "4 7.241291 0.993434 ch exp \n", - "5 7.826253 0.995613 ch exp \n", - "6 7.826253 0.995613 ch exp \n", - "7 6.504325 0.989105 ch exp \n", - "8 6.504325 0.989105 ch exp \n", - "9 2.582457 0.856930 ch exp \n", - "\n", - " id_l id_r match_key \n", - "0 10796240 1189196 0 \n", - "1 12768525 3102506 0 \n", - "2 11847537 2287041 0 \n", - "3 03069535 2800687 0 \n", - "4 10796240 1924738 0 \n", - "5 03488862 1277794 0 \n", - "6 06498279 2667482 0 \n", - "7 07127476 3397781 0 \n", - "8 07127476 2947920 0 \n", - "9 07127476 631566 0 " - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions.head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "20f36994-0e0c-42c7-8ac5-198ca17cce35", - "metadata": {}, - "source": [ - "## Evaluate\n", - "\n", - "Here's where you'd need to do some manual labelling to assess a methodology. [Splink has a clerical labelling tool in the works](https://github.com/moj-analytical-services/splink/pull/1208) but it's not deployed yet. You'd have to do this by hand.\n", - "\n", - "We can also peek into the linker to see various stats about its configuration." - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "551d846c-4efb-4074-b89c-cb6916037741", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v5+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.9.3.json", - "config": { - "header": { - "title": null - }, - "mark": { - "tooltip": null - }, - "title": { - "anchor": "middle" - }, - "view": { - "continuousHeight": 300, - "continuousWidth": 300, - "discreteHeight": 60, - "discreteWidth": 400 - } - }, - "data": { - "name": "data-5d1c50219449e2e0255b7ddaa903074f" - }, - "datasets": { - "data-5d1c50219449e2e0255b7ddaa903074f": [ - { - "bayes_factor": 2.2824686786571017e-07, - "bayes_factor_description": "The probability that two random records drawn at random match is 0.000 or one in 4,381,222.1 records.This is equivalent to a starting match weight of -22.063.", - "comparison_name": "probability_two_random_records_match", - "comparison_sort_order": -1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "", - "log2_bayes_factor": -22.062901601354117, - "m_probability": null, - "m_probability_description": null, - "max_comparison_vector_value": 0, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": null, - "tf_adjustment_column": null, - "tf_adjustment_weight": null, - "u_probability": null, - "u_probability_description": null - }, - { - "bayes_factor": 1077.8881005286016, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 1,077.89 times more likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 3, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 10.073991699123706, - "m_probability": 0.9999938432638393, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\"company_name_l\" = \"company_name_r\"", - "tf_adjustment_column": "company_name", - "tf_adjustment_weight": 1, - "u_probability": 0.000927734375, - "u_probability_description": "Amongst non-matching record comparisons, 0.09% of records are in the exact match comparison level" - }, - { - "bayes_factor": 5.003662767779489e-07, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.9` then comparison is 1,998,535.97 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.9", - "log2_bayes_factor": -20.93051210485307, - "m_probability": 1.0424297432873944e-09, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the jaro_winkler_similarity >= 0.9 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "jaro_winkler_similarity(\"company_name_l\", \"company_name_r\") >= 0.9", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.002083333333333335, - "u_probability_description": "Amongst non-matching record comparisons, 0.21% of records are in the jaro_winkler_similarity >= 0.9 comparison level" - }, - { - "bayes_factor": 2.398603557108488e-05, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.6` then comparison is 41,690.92 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.6", - "log2_bayes_factor": -15.347445746758282, - "m_probability": 2.2644199317030986e-06, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the jaro_winkler_similarity >= 0.6 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "jaro_winkler_similarity(\"company_name_l\", \"company_name_r\") >= 0.6", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.09440576059316999, - "u_probability_description": "Amongst non-matching record comparisons, 9.44% of records are in the jaro_winkler_similarity >= 0.6 comparison level" - }, - { - "bayes_factor": 4.296928613203078e-06, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 232,724.37 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -17.828262760151212, - "m_probability": 3.891273799259086e-06, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.90559423940683, - "u_probability_description": "Amongst non-matching record comparisons, 90.56% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 6967.79868693931, - "bayes_factor_description": "If comparison level is `exact match postcode` then comparison is 6,967.80 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 4, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match postcode", - "log2_bayes_factor": 12.766487226975748, - "m_probability": 0.6623540185783227, - "m_probability_description": "Amongst matching record comparisons, 66.24% of records are in the exact match postcode comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "lower(\"postcode_l\") = lower(\"postcode_r\")", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 9.50592932341548e-05, - "u_probability_description": "Amongst non-matching record comparisons, 0.01% of records are in the exact match postcode comparison level" - }, - { - "bayes_factor": 459.722336375772, - "bayes_factor_description": "If comparison level is `exact match postcode sector` then comparison is 459.72 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 3, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode Sector", - "log2_bayes_factor": 8.844618953374955, - "m_probability": 0.0655513205697529, - "m_probability_description": "Amongst matching record comparisons, 6.56% of records are in the exact match postcode sector comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]? [0-9]')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]? [0-9]')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0001425889398512322, - "u_probability_description": "Amongst non-matching record comparisons, 0.01% of records are in the exact match postcode sector comparison level" - }, - { - "bayes_factor": 108.80454006003016, - "bayes_factor_description": "If comparison level is `exact match postcode district` then comparison is 108.80 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode District", - "log2_bayes_factor": 6.765594946596759, - "m_probability": 0.05042155305902204, - "m_probability_description": "Amongst matching record comparisons, 5.04% of records are in the exact match postcode district comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0004634140545165047, - "u_probability_description": "Amongst non-matching record comparisons, 0.05% of records are in the exact match postcode district comparison level" - }, - { - "bayes_factor": 17.98989670573241, - "bayes_factor_description": "If comparison level is `exact match postcode area` then comparison is 17.99 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode Area", - "log2_bayes_factor": 4.169114997843882, - "m_probability": 0.22167260253148252, - "m_probability_description": "Amongst matching record comparisons, 22.17% of records are in the exact match postcode area comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.012322060885477316, - "u_probability_description": "Amongst non-matching record comparisons, 1.23% of records are in the exact match postcode area comparison level" - }, - { - "bayes_factor": 5.119283255544672e-07, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 1,953,398.45 times less likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -20.89755482977835, - "m_probability": 5.052614199149832e-07, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9869768768269208, - "u_probability_description": "Amongst non-matching record comparisons, 98.70% of records are in the all other comparisons comparison level" - } - ] - }, - "params": [ - { - "bind": "scales", - "name": "mouse_zoom", - "select": { - "encodings": [ - "x" - ], - "type": "interval" - }, - "views": [] - } - ], - "resolve": { - "axis": { - "y": "independent" - }, - "scale": { - "y": "independent" - } - }, - "title": { - "subtitle": "Use mousewheel to zoom", - "text": "Model parameters (components of final match weight)" - }, - "vconcat": [ - { - "encoding": { - "color": { - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 0, - 10 - ], - "range": [ - "red", - "orange", - "green" - ] - }, - "title": "Match weight", - "type": "quantitative" - }, - "tooltip": [ - { - "field": "comparison_name", - "title": "Comparison name", - "type": "nominal" - }, - { - "field": "probability_two_random_records_match", - "format": ".4f", - "title": "Probability two random records match", - "type": "nominal" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Equivalent match weight", - "type": "quantitative" - }, - { - "field": "bayes_factor_description", - "title": "Match weight description", - "type": "nominal" - } - ], - "x": { - "axis": { - "domain": false, - "labels": false, - "ticks": false, - "title": "" - }, - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 10 - ] - }, - "type": "quantitative" - }, - "y": { - "axis": { - "title": "Prior (starting) match weight", - "titleAlign": "right", - "titleAngle": 0, - "titleFontWeight": "normal" - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": 20, - "mark": { - "clip": true, - "height": 15, - "type": "bar" - }, - "transform": [ - { - "filter": "(datum.comparison_name == 'probability_two_random_records_match')" - } - ] - }, - { - "encoding": { - "color": { - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 0, - 10 - ], - "range": [ - "red", - "orange", - "green" - ] - }, - "title": "Match weight", - "type": "quantitative" - }, - "row": { - "field": "comparison_name", - "header": { - "labelAlign": "left", - "labelAnchor": "middle", - "labelAngle": 0 - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "comparison_name", - "title": "Comparison name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4f", - "title": "M probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4f", - "title": "U probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - }, - { - "field": "bayes_factor_description", - "title": "Match weight description", - "type": "nominal" - } - ], - "x": { - "axis": { - "title": "Comparison level match weight = log2(m/u)" - }, - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 10 - ] - }, - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": { - "clip": true, - "type": "bar" - }, - "resolve": { - "axis": { - "y": "independent" - }, - "scale": { - "y": "independent" - } - }, - "transform": [ - { - "filter": "(datum.comparison_name != 'probability_two_random_records_match')" - } - ] - } - ] - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAm4AAAD4CAYAAABVCGpkAAAAAXNSR0IArs4c6QAAIABJREFUeF7snQnYVdP3x3cyFCKSMZWxiQxRpmQMSQOhkDmKRjKLZChUVESJSCGZZxkqJPEmRaRQyFAqw0/hZ+j/fJbffv/nPZ1z7z33nHvOHdZ+np7qfc/Zw3fve87nrrX3WpWaNGmyxmhRBVQBVUAVUAVUAVVAFch7BSopuOX9HGkHVQFVQBVQBVQBVUAVEAUU3HQhqAKqgCqgCqgCqoAqUCAKKLgVyERpN1UBVUAVUAVUAVVAFVBw0zWgCqgCqoAqoAqoAqpAgSig4FYgE5Xv3VxnnXWki//880+Frvr9PBfjibOtXPQ/n+rcc889zW+//WY+/fTTfOpWUfdlyy23lPEtW7bMc5z29z/99JOpXr26Wb16tfn1119Da8Lnxv25DV1pAVaQTv8wQ9prr73MqlWrzIIFC8JUo/eqAqKAgpsuhNAKPPLII2bnnXeWet5++23Ts2dP+feIESPM/vvvL/+eN2+eOeOMMwK3deedd5pmzZqZV1991Vx++eUp73/++efNVlttZQYNGmQee+yxwG3l6w3XXHONWbNmjbn++utj6WKrVq3MgAEDzOTJk82VV14ZS5v53shhhx1mDj74YDN16lT5E2UBGJ566imz/vrrm2+++ca0bdu2QvX8/LXXXjNVq1Y1y5cvN0888YQ577zzBKpPPfXUUF1hTR1zzDFm5syZ5sILLwxVV6qb69ata84880yzaNEi88ADD6Rt57333jOVKlUy++yzT9pro7qgrKxMqmKegeJU5aabbjItW7Y0U6ZMMZdccslal7rXC3O2/fbbmxNPPNEsXrw4qi5rPSWqgIJbiU58lMN2gtvPP/9sDj/8cKmel82mm24q//7444/N6aefHrjZkSNHmqZNm5rXX3/dXHrppRmB2y233GIeffTRwG3l6w32hRLHS6xKlSqiNbDAi2nlypX5Kkus/RoyZIhp0aKFrOnLLrss0rYBprPOOkssnPfee6+5//77K9TfqVMnc/HFF5v//ve/Zty4cWK16dq1q3nnnXfM0KFDQ/XlhhtuMEcffbR59913zQUXXBCqrlQ32y8DgCftpStxrnnbl4cffljWPXqjdarCMwY4e+ONN8xFF1201qXu9XLEEUfIF8pvv/3WtGnTJt3w9feqQEoFFNx0gYRWwIIbViG+JfONlcJDzf7MghsPRh56wBj//v333wUU+vfvL+6ahg0bmsGDB5saNWqIGwj3wrbbblsObnvvvbfhZbPFFluYv/76Syx5vPh40FqLmxe4YbUCKPkmTxvUv3TpUnkhLly40Gy00UbyEtx9991N5cqVBVheeeUV+RkvbCxQtLXZZpuZWrVqmebNmwtItm7d2gA79JXfUx/94gXPt3a+XeMm+eOPP8wdd9xhjj32WNOgQQO55tZbbzXPPPOMwVXFmNANTX755Rf53csvv2yeffZZs80224ie9OmUU04xtWvX9tVg/Pjx8s0eLWjrhRdeMBMnThRrZePGjWV+eHnceOON5v33319r7i1EfPbZZ6Zjx47ye17o/Jtx/vDDD2batGkyh5QDDjhA5g5dmGtezOjy0UcfyYutX79+osEmm2xitttuO/Pjjz9K36+44gpTs2ZNg9uvR48eAiOsI8bKejjkkEOkPerp3bu3rIN0awfo2XHHHc2MGTPE0rvBBhuY+fPniwWY+Um1dlLde/LJJ5tzzz3XrLfeeubPP/80L730klg/Gcd+++1nNt54Y9GF/j/44IOenycsZKeddppYzf7++29Zc4z7hBNOKK+bNQGwdO/evbwO5pv/M3bumzNnjnnuuedEk1mzZom1J924d9ppJ/lMoS1z9P3335tRo0bJOFKBW9D582uH9cba33DDDaV9xs64+FxdddVV4vZlfubOnWuuvvpq+bcFtzfffFPmkvveeuuttaxb9erVM6NHj5Z5AY74PKIJXyABpJNOOkmeD7TJHNJuly5d5PPOswct7VrmM7vuuuuaI488Up4naIMGfD7Riv7y86OOOkru4Xes7c0331zWAGuZtd+kSZO11st1111nGAvzTz8++OCD0M9draB0FVBwK925j2zkFtzYm4Pbh4cU5dprr5X9OvzMghsvDB5sFB6u1iKHWw4AwA3FS4oHNSAHRFF4mQ8cOFAeoPzsP//5jzwEedB+8cUX8oBOBW533313uduFhy9tUIArIAHAOO644wSo6DOwSOGFW6dOHXnB2MILlBcnrmD6+d1335mtt95aHvC4RHioYw2h8Hv+2P13/Ixx8X/q4aXECwLrFtehCS8yClYVQHCXXXaR/3/11Vfy7R4Q89PAaeXkHvrDfjWAhhco7dLXFStWyAvIXbD27LbbbgZXEPcCG4yVgubVqlWTfzOPgOXjjz8uMIim1M18MD7Gw4vUurcZq51LpwZ2XMcff7wAIS9UinOOeKGfffbZ0qbf2sGlS38AcgovZcCPgpUKoEq1dlLdC4wCr9SHVezpp58WuOAFDFAy/2wVYNxYk9xWSmDBWmWAcjREM9YZ/erWrZtADWNm/Tvd0+3btxf90YXfsxUByGVd4nbE9Zaq78OHD5cvB6xn1jq6ABr0lS9PqcCtQ4cOgebPrx22SAwbNqy8XaCF9TVp0iTRAQ0ZP/8G1HGpWnCzn1F+T+EZwJqzhTXHZ42/ATKs+nypojAXXM8XpxdffFHWF1YvCnPElw3axNKGhczpKgX6+vbtu9Za4jO67777loObXWt8SaAu5nfMmDFrrRfA2bpXeV7yfy2qQLYKKLhlq5zeV66ABTfgyroP+CUWJPszwI2XrwUaQIgXDi8g/vBiB3iwxPBS4cHJw5Vv2TyUqQfwACR40Pfq1UteRljC7EOavTPscfOyuFlws66KRo0ale+1waIB3GCJw8oG1PHNGdC4/fbbpT0Lbli0cN8yNvrIi4Zv3TzkeSFixeIlb8cJkACDvNQovKR58QJYFL7dYxXDmgM0MV6AF6sZFpXzzz+//IWCq5SXeCoNgDpgGCgAAHhJ0heACisa8AK4AQGMgxe5s2Al5SXZuXNn88knn8gLD8sYfcRqie5Y8qZPny7toIOFQNoAPAAcXsqff/65vPhpAzhm/plr9ADsqAcwBSiox4IbWjGvtEU/WA9Y9lKtHfZBWoCx+yzpL3PEnLN+Uuk2YcIEgT6ve6nD7friZQ9QA9p2blkvfHlwH+jgSwlrw+6HwmIKFFPQBEDClejnrsQ1C6B9+OGH4lK1nxk3uPn1HVhhTgFYdLRbFvisss79XKUW3DKdv1TtHHTQQTKn1lUKULH2gV6+MFnLLWsJV6WFKO5BX2t5RjcAyFns84e/GZM9ZMBzgM8boIzOfB4AbD6zfK4ZH2O3688JbnxJwDJu1yLgDYC7wQ0rG19QDjzwQIFTC8RervVzzjmn/PNPXVpUgWwVUHDLVjm9by1w48XAA4lvnRTcBzz4eEACbjzYuYbCt30ecsCXffGxQbtdu3blD3Ou44WKO4QXL0DAA96rYJ3C0pcO3OyDmDqs64KHLK4u3F8AlLPwMMa6ArgBcLhLKPSbsVjrmL1n9uzZ8nC2kGH3pbn37NjN17h2cNd4FQuZznuxoKTS4OabbxagYkzABYUXFgBpCy8f+oerzl1sW7xUcWsCA4CsBW3n9cDZDjvsUGH/IfAMFFOPPVBiwQ4LHy5auw+SlywvV6xYWEksuAHS9A+LCCBNAVZwifmtHcDvySefFPhiHrGK8bIGonGJA5GpdKNffvcCmF57lgAIpyWV+QKKeJk7iwVn5gGIowDorGfqBRCiADevcdN34Jn5d/aVPrB1ALdiOnDLdP5StQPYOMGNL0D169f33dxv1yFzxpcMu64AOGvRtxpbIMIdiruWLQtseQCEec5wP/W4rdH2fgtbTnBj7bLucbPzGcBizb5ZN7jhludzZJ9j9vde4MahEyz7zueI5wdff6gKpFFAwU2XSGgF7DdeQAawwbVIwfKAlcCCG0DDy5li93ngCgIKeODxYsbCZB+0XGetKIAbL0ReQIChdd/xDRoXBZYOXCjpwM3CEA9lHsgUHs64LuzpPR66nErFSuEEN6d7ERcXL9yvv/5awIj+4MYLCm68NLFm8VLlWz6n+wAW9vDhGgXwnOBmIcxPA3QA3CxoAc/AMD/DIsQLzJ4AtoDkXAC87OiL/R0WDixEd911l+ylwiUNDAAeuKB4MdJPC4b25ciYcHHyMs/0xW/Bzb6c7YZ21gYWu1RrB/eVXSu43IFhJ7hhIUy1dgA9wM3rXi9woy7miELbe+yxh+jm/GJgdcXShtWH9YmliTmxJ1OxKgK0UYCbV9/5zFnrHuua9WUP7kQJbpxuTdWOG9zsOrauUazdWBaBbKyA7i86qcCNL0+Alt1PyxYGYI6tFDwbqIsvdoA9lmys0HwBwr3J2gaksEQ7wY1nGSdhH3roIdnnateSG9zs4YRMwM1ay/mSyEleLapAtgoouGWrnN5XroAT3Hgp2Zc4Dz/gx4Ib1ghekGxSB87YOI51BqDgAc7DnN/zsMVNyoZjQIwCuGF9Ax4oAAbuJyAEVw4WF1746cCNe3EB8lDmwU4bwKa16GERoc+4dii8BIAkQMgJbnYvGO4qHviMkX6zeRz4zNTihssQlyXjwGXDi5W2sfwxVoDJvlCwPqEPG739NOAaJ7gBE0AWUAoEYpXgoIHdEwRoOgsvQF6E7NPBvWzdcuhEP4AUXKJYTwldgT4U4I0+24MUuLuAmWzADQsIcwQY00/rEky1dtgXlQrcWFup1g4u4VTghiUT0MFaiJsNUGHO+CIAhGE55QAGUGQ3u1tdsbJgbeGljzUI6w1rDEsjaw/gyhW4WWCh7fvuu09c1MALBasqruooLG5Y2gEjv3ZYN1gocZPzOca6zvUU1jzgBtzasCRBwI06rDuaf+Papi32alJwmfNFwul650sAh1X4rPAcwo3uBDe+oPDlhfHwLLJ7JzMFN/d64dlkt2sAjnxx1aIKZKuAglu2yul9a4Eb34qBAwsWvNywqAE1No4b30yBHqDLFl5+bGDGNQfc4XoAgig8NLnWhmEAJHjoWbcPAMg3dOAkVRw3+9AEjuw3cfbV8VLloc/JM17+tMvP2YgPwLDPjIc+1znBDRco4wRiKOzVAVqoHxdgpuDGy5M20c5CD/XRLtYSIMYZbgU9+bbup4G1eDldm2jLC9zpBsZN3KdPn7VWsd0XZvdLAeK4XJ19Y2z25CPWSvah2fnixYaVElcYJybZs2h1Axp4oVpXKVY0LJ3uPW6AMhBEYR7oP2CYbu1YcLMvaqfFDatZqrWT7l6+GNBX1h0vek79AnD2sAZ9xZJiD7g4hWWNsP7YR2kLXzZY54AzLsRU4Mb6BiTsIQ024fPlwL3HzW/cABsniq2euGhZC1iSgGO/OG5B5y9VO4CadT9ai7p1J1pN0A8A5DngBjf7RcnLosn9Nh6drduejubzw8lfG2DYxoW0bbLnjs89J23t9gXmGsDkCxVfPihoDXBbt6o7HAj7RrHy2t+71wtgaJ9PXtsO9HWiCgRRQMEtiFp6bWQK8K2fzd3sfeOh6SycoONFw4sKN4a7YD3CLclLnfszifpuwY0XCC8s9s1hHXPeixWEb/4cMMikTl7ifGvHWuje5J+NUGjCy4F+ufdJseGaly2ASN+y0QB3FXWgmV90fuAIyxbjsafzGAsuJSxMgAYvVmehL+jAyw5rAn9nU6yrFMhcsmSJwJtXpPlUayddu9noZuvkXl7QgCd/KKwXwsNw8IPTzakKXwSAN76oxB1Bn3kFwnPdbqp2+LzwBQDrrV1/bAsAjtAOOI+j0CbPF9pkq4NXAZTZL8cXGL7k4HoFlvkiyReQTIpzveCWBdwIC2St+ZnUodeoAl4KKLjpuigJBZzgRggELf4KWEsIgMsen7iKE9yc4SDial/bUQWsAlj+bQYYZ2iZbLOJ8FnCChf3Z0pntDgVUHArznnVUbkU4PQcbkkOFfgFSVXR/lUAyyN76zhc4g69kEuNxo4dK/sfbcDUXLaldasC6RRgTxzWMfbNYgHH8oaLPJvCF0e2BHhlWcimPr2ntBVQcCvt+dfRqwKqgCqgCqgCqkABKaDgVkCTpV1VBVQBVUAVUAVUgdJWQMGttOdfR68KFLUC9tRvtgcmilocHZwqoAoUpAIKbgU5bdppVWBtBQiZwAlHUvA4Cxv9CdVAeINSK/fcc4/E4HJmjgijAeEkONVKrDBn4ZQiYWAIEaJFFVAFVIFcKqDglkt1tW5VIEYFADeggkCxbnBj4z/QUWolanADftGYeG3OQggT4oERM0yLKqAKqAK5VEDBLZfqat2qQIwKZAJupPohij0xtQjySyBd4sIR+Z+TtwQnJq4eJzvdMa64hhAJBCMlHhnx3LDkUQdBXbHskRuUQvBgAvMSN47YVwT7tflqic9mryPCPG1SL5ZCriNmHflhyXZALkhOuQJM5LbE5UmcOU73ETSXf/M7gtTSPwLZci8py8hoQIwwUrARO6xmzZqSBYNE7YzNr17676eHF7ihBacGccvadEZ+43cuB4IT019nIdwEwWT97qcN5pA4hgQ9JhMG1zIuUk4R54+gumhK4GgssPSP+GGkXCJGoRZVQBUobAUU3Ap7/rT3qkC5AunAjQCnZIAgofquu+4q0fhJ2A1QkaoJCCANEVkZCBhKOiZnAcQIkQCckFrLps4iFyfl0EMPlcwXZLrAZUiwV7IukCEAsCNTAqAHuFnXJX0GMk4++WRD3keCopLTk2CnBDAl2CnXAG3URXBm7iXW3IknnihZLviblEIEd2VswA8x4UjnRMR6cqoSzJnAxkTmB1gBR796bT5PLz28wI20SbRL3labRNxv/M7AzlwPRFPQZocddpAUYp9//rmvfqTHItMBGTKAMMZIAGgCxKIfMIoWwBsBkwlgTeYNxg2Qo5UWVUAVKGwFFNwKe/6096pAxuAGjPECJwgoFiliVGF5ApLIkrD//vuL9Q13H+m8gCkgwhYLbljGiGuFhYdE6wQWxYJH/lhSAZH7Emjg5xQS22MlwpXYt29fX3AjlRYWI6xnZCYAbLAaUS/9JT8qhbRHAAkARlom+s01pDvib6CNtEKMY9SoUZIlw/YFWGPPG7lD/eolB6ufHoBSOlcpcOo3fiDLXci+QMozwBJoTnU/8fUAYkCNYkGuWbNmAm6k4yK1F7CKm9gGfCVdF1Y64FqLKqAKFLYCCm6FPX/ae1WgXAGsPKSespDCL7BakbYH6CHdF9YsgtySU9S62YAbQMNdgBQgyAluWM8AKgqWHEAPGKSQ6/G2226TBOyAHX9TsLC9/vrr5uqrrxYrndPiRsJx3H9AItYxXKZAIAnYx40bJ+17ZW9YunSpXDtx4kRJIg7AkCuStF5YEHGPYpkDXnC9AmoU9vmRyoh7/eollZqfHvQzHbiR2shv/C+99FIFmbGQ8TNcwAAv8Jnqftyo7Fe0B03QDBcouXMBNyyhXINFlfFhVcWKSSBlQN25NvSjowqoAoWpgIJbYc6b9loVWEsBLC38wcIFAFAAGZK+8wInVyWWKqCI/wNcQAMgx4vebrgH9rA4vfjiixLt3Qlu7GfDumPBDVelBTQLbgACbj9cehSbTB7XK+CEa7F9+/byO/apATkkhKevJCLnelyZ5Efl9CZANmbMGAFFCm5AQIf2gFL22gGQAAv1Y62j77hT3YcTLLh16tRJIMerXg4a+OmB5SoduAFVfuN3594FroBEgNbmOk11P1pgmWR/HAUgYz44kAK4Me6BAweWg1uHDh3M4sWLFdz0eaEKFJECCm5FNJk6lNJWYLfddpN9WySCx93Ini9clAAbbkM29GORw+XJfif2prGfa968ebIPDTcnIMffXAeAWQBEWe7LBNxWrFhhyAfLwQNcnDaVFfvVAA3CZgBHpBJibxjuWMCNwwSjR48W2KIte2CBPVtYwXAPAjkAGcCHpQ23IqE46DcWPdyfWOyspckP3LBuATpe9dIfPz2wanqBW5cuXQx/0Ax3pN/4nSsUtzHARgo2a4kDOjlQ4Xc/c8hcog0pyQA1AJa6FNxK+/Ovoy8dBRTcSmeudaQloAAnDIEgThxSsEzhSsNKs+OOO8oG/8qVK4urFMsblif2dI0fP14OAFCwXgEobldipuCGZYx9aFtvvbW0w58BAwaI5Qy4vPfee6UPWOtI4A1E4oLEmoW1jfbpP3AGjAF6WJG4h7J8+XLTrl07ude6CgFV3LG0y+EIAIriBW7ogDUwVb1+evjFcWMfHQDGmLAc+o3fuQTpL25jZ8HqBsz53Y+bl3awilKYwzPOOEOsdV7ghruYQykAM/1yHzgpgY+EDlEVKDoFFNyKbkp1QKWuABYnLGZYb9jH5iycyiQ8BLDkPHjANXXr1pW9YWz6J6RG2IK7kYMA1Oe03NE/QAeLkfOUJe3htuV3b731VoV7ABX2ceFWZQ9bVCVVvUH1oC5ADLCk+I0/07773c+eQA4fAG1o4dYw0/r1OlVAFShMBRTcCnPetNeqgCqgCqgCqoAqUIIKKLiV4KTrkFUBVUAVUAVUAVWgMBVQcCvMedNeqwKqgCqgCqgCqkAJKqDgVoKTrkNWBVQBVUAVUAVUgcJUQMGtMOdNe60KqAKqgCqgCqgCJaiAglsJTroOWRVQBVQBVUAVUAUKUwEFt8KcN+21KqAKqAKqgCqgCpSgAgpuBTbpxOHiz7Jly/Kq50STJ3+kM15XXB0kfhaBZpOKZ1WlShUJBhtFIfDt9OnTo6hK61AFVAFVIOcKTJo0KedtFFIDNqj2L7/8krNuK7iFlJa8iUQyd5Zvv/1W0vF88MEHFX5OGh0ivpNXMNtC5HtSFZEayKuceeaZEimdqOyZlmuvvVZSB6233nrm0UcflSTiQSGIVDxEyCeCfZSF6PsXXnih6devn2+1M2bMkMjwpEaKupA9gBROzJ1XIQo/mhEc1l2ymYuysjLPuqIel7u+E0880STxANZ2cz2z/9avOqvOuVAgynVF/uKrrrpKuvn4449LthSCYJM2j0JqNzLD+BWuJ0A3qfb8is3nPGLECPPAAw/kQhLJn0zml/3339+3fgKNP/3008II5557boXrttxyS9O/f3/z2muviQ5eRcEt5NRZcDv99NPNd999Z7bddltz1113ma+//ro8+bZtgt+xEJnYbAopawAk0vn4gRWL4cMPP5RUQZkW8jtecsklBgA64ogjzAsvvJDpreXXsdhIaUSOSJJaR1X23XdfQ9Jt/vYruQQ38mBiTVuwYIFn86nALZu5ePW47mb0qFFRyZdxPQ0bNTIfz5uX8fVRXajtRqVk6nrW9GqnYB6D1FGCTJDuFkO7TnBbtGiRfNkg/+/5558vUvCllhzHfoW8y6tWrZJUdn4F8CPnMe8UjBW5KOR6Jj3fxRdf7Fs9Hqonn3xS3tVnnXVWhetszmnea34GAwW3kDNnwa1169aSRojCxB1wwAGGPIEQ80cffWQaNmxoRo0aZY477jjJD8nv+Yaw4YYbCuSROBsX2fPPP2++/PJLSVkEBGE9swXLHul0+vTpI+5SEoqToggXJbBIipxTTz1VLF933HGH1MM3mM0228xgtiWhN5Y4Zxt8QHbZZRdxNbJIhg4dKvkMcdlVr15dQBNX4Jw5c+RDBKCNGTNGEoST47Jq1arm8ssvl99jsWEsF110UQVVSXTO9SQ9x6r3yiuvSKJs6iftEn0mvyR5Jfkm8scff4hu9OHNN9+UNrB6kV+ScaLNn3/+KYm5yYHJAkd76qegy913313eB+CK3JNt2rQRVy5gyrct2uMPaaHIQUndpFuibiyb/Aw9Vq5cKUm/L7jgAsmNST5K5oUE5cw1FjfSN6EjfWf8QLZzLsaNG5fRSnts1kYZXacXqQJBFJg4qJWCWxDBsry2GAAqyNCjHK8FN4wSPGOxWJHXGJABhCy44V0hpzGp33gfkouZZy5eD959s2fPNljW7HWk2MOY0L17d4FBwA2vGO8yCs9vnvO28E7Ho8U7jXuwfGFBAwh53tMv6t94442lbZ77pJ+jPfIrk2eY/h577LHyHuDnvMOANN4vvOd5PwNuWAg32GAD+TNv3jzp26uvviqp89CAdz7XuouCW5BV6nGtBTcmHziqVauWWK0w6wJmTCJgddtttxmuZXGyAJhg9qkNGjRIwAfo4D5yNLLQWDi46Zx7xnCR8jNgg3tatmxpTjjhBCF2XLBHH320PJyBiCuvvFIAbf78+WJ9u/76683OO+8s1zjbAGKwlNEPFurEiRPFVQrokND6sssuk+TkQEvbtm3lAwKEXnHFFfJNCEsYi5c6qYN73d946DNt4+6kDq6hXT4QACsfAtyKgCdwiEWRdpo3by4QCTTxISA/I6BGu1gvr7vuOqkPyMMqhtUQLbgOMHYW4A4wW716tWgHMJLYnG9pjKVJkyYC1fyOhwLQTV+oE2hEh2nTpkkdQCJj4ec9e/YsBzesoWjAvPLN0DkXme6BU3AL+YHU2z0VUHCLZ2FECTJBelwM7VpwA6p4vvMMBlzIucz/ATfr4uRneIqOPPJI89lnn8k7rFevXmKA4As3OXx5hwI/QBvvMowAeE6AI37O+453BVDG+8L5ruW9wBd43id8iafgVaNujC3NmjWTLUsYULiW9yCwxjuZLTv8G8ADwjBQwAAYJzBcYMjh/QG4UebOnWvq169v2KvNFiven7yDgFKg74033lBwC/JhyORaC26IzGLA4oIlCKvPFltsIeCGhQ2rDhYtFmfHjh0FXFq1aiUveRYCkwNIYAnihT9s2LAKzQNzLBAgCWLHwocfnEU7depU88gjjwi9O91ztE/bfCPYY489BAiBISDL2YZ1lWIts3vcADcsStZMzTXAIAsLS9gTTzwh3wqw4FlwO+OMMwTwWNTOArixYHm40G/21LGo+ZC3qgdhAAAgAElEQVS98847ZvDgwaITFkZckyxiPqh8CPjAWVcpVjQW99lnny3VY7Vj/HxQnnrqKdnbQL18uN17ztCVDyZ/gGTqp6/UST0vvviifFD5tkahbercfffdBdAYPx9iC6V8C8IaacGNPvEBtGMDkLNxlSq4ZfKp02uCKqDgFlSx7K4vBoAKMvIox2vBjS/07APHSwEs8T7leQq4sYcarwfAxf5nnuV8GT/44IPl/WhdpRgE+NLPM55nN8/nX3/91dSoUUPAjfcv7zLgCbcl9fMOtwVY472JIYMxAne8Y3mfYgzBEDJhwgRpGxjkWiCOdxVtAW4YFWgH7wwQat/ZTnBjexUGA96tXMP99Bd+UFdpkJUY8FovV6mtAjgASCzNW3Bj8QEM9ucWgAAXFiuuRQDKWSzcYboFdixcYGkDdjARA2VOWLAHFAA1qJ/FbsHN2YYfuPEtAbcshWuw3OFuBU7sPjgsi1xDGwAp33rcmzIBN1sXsMoHBliiWHDD2ghcsv8PaxiL3w1ujI0PgfX7Y3Zm4WO9tIcTADa0dYPbKaecIlYwYBHA44PHxlC0p9+0Sx8/+eSTctknT54s1kDAjW94WArpE4X70dOCm20PjbAYKrgF/CDp5TlVQMEtp/KWVx4lyATpcTG0a8GNdybvAwrvPaCG5zvghmUNgwUeDN4dwBluSje4AVW8H6zRBJclX9p5Tjv3uPGexaNkjShWc65hPxxgxrMfwwN94Q+GC9rEU8PvMcrYwvubdyTgxlYZ6gfseP8wJjwyTnCze9zYD0df8egAoApuQVZ/FtdmA25YbQAFCB6AAmSYVBYfAOQFbnSNCWV/Ga5SyBwIARywSvHNAMLHEoQVioXOXjJ+T1vAE98s/MAN0y0L0Glx8wI3XInsbWNh494EbKzFjXFgMuYbhLNkAm4AH6Zn6gDMsN4BSXyorFWMDxIPqA4dOpiaNWvK4uZbCn1OB258G8IySQEsuad27dqy7xA90Rw3NxDH31gcccvyjQ9ww03K2AHKL774QsAV13gqcOPbHHOBuzXToha3TJXS64IooOAWRK3sry0GgAoy+ijH6wQ33mmNGjUS2OILNwcJADe2qWC5YnsM0NS5c+cKFje8XljZ2BfHewmPFu8VvFkAFO7STMCNfeG8Pym4O9mDxruTAixiCMGQgJcGjxE6sD+b98PYsWPLXaW0Xa1aNdm+xHuF91Cm4IYHjPcLf7uL7nELsko9rs0E3LAu2c3/NhwIoMKiYyMmLkx7ygVwg+jtEWhnk/wMixP7v4APruN+zLi46YAoYINvD9RnwYdrlixZIgcZ+EbAIna2gasRszOWM35u97jxIbEHDYBGDjrwTQOLGwubf/MNxJqjASkLM25ws3V5WdzYG8YeOICMDwJ1bLrppgI9uGqxHOKCBkyBITaFYmHk58AUJmW+5fBtzM/iRn+slZA+AFPowB6JH3/8UfbX4WJlXMyH/ZYEdGJC5wOENuiO3jxQOLTAw8EZDsRpcXPORaYnmBTcQn4g9XZPBRTc4lkYUYJMkB4XQ7tOcOPZimHAGiF4fuLdwVuCkQKLFu8KnsU8r3mO8/xm/zXPa77Qs9WFfWUUntfUhwvWCW7UBSS6LW4W2AA0LHy85/CUYcxgvzUFoMRty+E9CtudeF/aPeR41NirxruGgxC8Z+iP3VLDu4x/A3vch1UOixv7+jAysC0IaOQdp+AW5NOQ42uZRMy5TB6++XQFSxbmWRsOhIllcbz//vsCO7awSFj4wCKLEosR+9+AEhawuy0WPuDGptB0hUXGNwb6jCsYaLH9wSrF753uxnT1OX/Pnjw+IMApHwbcmHxjon98gBgThf0EnJjNRbBfe6oUq5qzYPnjwWL3HgLAqeK7ec1FJloouGWikl4TVAEFt6CKZXd9MQBUkJEnMV7eB+yDZq+ZOywWX+o5bcr7jsL7z1q5gsYmzVQHrHsYRmyb9j76CWiyVxwXKS5frIXsL08Va477uZf3OO91LIsKbpnORp5eh3kWM3Gm4SWiHgbfHrCMAVdszse9yjcFfs6HJFWcnaj7Emd9QDJgCjwClzwgcOna/YbZ9MUGg3Teu7hSi2yq0ntUgZQKKLjFs0CSABlGVmrtxjOb4VshnAl7t23BmMLeN/ZxhynqKg2jXgL3QuH8weqVVMHKh5WNbzy4J/kmg4sSiGMjZ7EWLKS4uonJwylUt1UuinGrxS0KFbUOtwIKbvGsiVIDqKTGG89sRtMK4Mb+OLYLcWIWl2nYouAWVkG9XxWIUAEFtwjF1KrKFVBwi2cxJAUypdZuPLOZv60ouOXv3GjPSlABBbcSnPQYhqzgFoPIJeiyTAoY45nN/G1FwS1/50Z7VoIKKLiV4KTHMGQFtxhEVnCLR2RtxSi46SJQBfJIAQW3PJqMIuqKgls8k5mUBarU2o1nNvO3FQW3/J0b7VkJKqDgVoKTHsOQFdxiEFktbqFE/qjNrFD37/ZMkwr3EwmAg2TOA3OE2SBGaBQHBEJ1NuTNCm4hBdTbVYEoFXj1uO5m9KhRUVaZUV0NGzUyH8+bl9G1UV6k7Uappn9da3q1k/hRcZdSswTpeLNfYUvahQO3Wk/9C27EcSPwLYF2CdROuCoKWXlIRUhcNACOmKMkoC/EouBWiLOmfS5aBUjr4s6zGsdg9YUTh8qlF29L15Wuq0wV+KV9OHDb5Ml/wY2wTWRRIMUU6RkBN6xvpH60GRJIIk8AdXJ/F2JRcCvEWdM+F60CCm7xTK0CheqcCwV0XYVQtcPa4LZmjTGVKqWwJDt//1hFV+nAgQMlzSPgtueee5bnvKa2W265RdIskhqrEIuCWyHOmva5aBUgH+r06dOLdnw6MFVAFSguBaJywVc5MZzF7fdJ/uB22GGHST5rck1TSD+FZ0PBrbjWoo5GFVAFVAFVQBVQBWJSoHrHcOD20yP+4Ebe69dff900bdpUMv0MGTJERnXxxRfHNLpom1GLW7R6am2qgCqgCqgCqoAqEFCBbTqFA7fvHvYHN7rCHjeA7c033zSPPfaYIY/oAw88ELCX+XG5glt+zIP2QhVQBVQBVUAVKFkF6p4aDtwWT1gb3GrXri1J3SndunWTk6SUpUuXmjZt2oj1rRCLglshzpr2WRVQBVQBVUAVKCIF6p0WDtw+HV8R3LykqV69uqlZs6ZZuHBhQSun4FbQ06edVwVUAVVAFVAFCl+BxqeHA7e549KDW+Gr9O8IFNyKZSZ1HKqAKqAKqAKqQIEqsO8Z4cDtvQcU3Ap06rXbqoAqoAqoAqqAKlBoCjQ/Kxy4vTlWwa3Q5lz7qwqoAqqAKqAKqAIFqsARZ4cDt1fvU3Ar0KnXbqsCqoAqoAqoAqpAoSlw7LnhwO35MQpuhTbn2l9VQBVQBVQBVUAVKFAFju8SDtyeuEfBrUCnXrutCqgCqoAqoAqoAoWmQKfzw4Hbw6MU3AptzrW/qoAqoAqoAqqAKlCgCpwREtweUHAr0JnXbqsCqoAqoAqoAqpAwSlwXrdwFrfRd6nFreAmXTusCqgCqoAqoAqoAoWpQI8L1ga3NQSbTTEc5+9HjFRwK8yZ116rAqqAKqAKqAKqQMEp0PfCcBa3wXcquBXcpGuHVQFVQBVQBVQBVaAwFbiqezhwu/EOBbfCnHnttSqgCqgCqoAqoAoUnAIDeoQDt2tGKLgV3KRrh1UBVUAVUAVUAVWgMBW4uWc4cLtsuIJbYc689loVUAVUAVVAFVAFCk6B23uFA7fewxTcCm7StcOqgCqgCqgCqoAqUJgK3BUS3LopuBXmxGuvVQFVQBVQBVQBVaDwFLivdziL29m3q8Wt8GZde6wKFIECt99+u5k+fXoRjESHkE8KvPv3u/nUHe1LkSjQtHJTM2nSpEhGM8ED3ILEcTtVwS2SedBKVIFQChx22GFm6623rlDHV199Zd56661Q9cZ5c8OGDc1mm22WMYyVlZWZffbZJ84uSlsnnnhiZA/gIJ3XdoOolf21s457P/ub9U5VwEeBQVUGRvbceKxPOItbh9sqWtx47v7888/mn3/+Kbr5q9SkSROgVosqkHcKvPjii6ZSpUrm66+/Lu/bnDlzzB133BGqr5dffrlZf/31zYABA0LVY29+4403DJD5119/rVVfv379zC677GJOP/30jNpScMtIptAXlRowKriFXjJagYcCUYLbsyHB7bj/gVvt2rXN/fffL8C24YYbmhdeeMHccMMNRTV/Cm5FNZ3FNRjA7aWXXjLDhg2rMDDA6+CDDzatW7c2nTt3Nqeeeqpp1aqVufrqq03Lli3lWu69/vrrBZruvvtuU61aNbNkyRJz6623Sn0A4fDhw82DDz4o1++2224GN+Xvv/9utthiC/Pmm2+aPfbYw1SvXl0++P379zcnnHCC6dmzp6lSpYr57LPPpF3ub9CggVmxYoVp27atPDB22mkn8+OPP5pLL73UHHfccebQQw816667rtlggw3Mo48+aoYMGeI7UQpu8axhBbd4dNZWiluBKMFtckhwa/k/cBsxYoTZaKONzNlnny3P4okTJ5rDDz9crG/FUhTcimUmi3AcwBews2zZsvLR3XXXXWbWrFnm1VdfFTg75phjzH333Weee+458/TTT5sePXrIN60777zTHHLIIWbo0KECWtddd50A3BNPPGH23HNPsbhdcMEFAmqUfffd11A3dfCzk08+WYBt8eLF5vzzzzfNmjUTmHvsscfkQUA9N910k7hAX3nlFYG2Fi1ayLVnnXWWueyyy0zVqlXNwoUL5XeAX+PGjQU2DzjgAAW3/ylQagCV1HjV4laED8g8GFKU4DYtJLi1+B+48WyeMWOGfElfZ511zLvvvmu6dOliZs+enQeKRdMFBbdodNRacqAAYLZq1Srz0UcfldcOOPH/U045xVx00UVm6dKl5thjj5UPKHvDsLjVr19f/gBJXN+3b1/zzjvvmB133FFAkA8xMAe42QK4jRw50hx44IGmVq1aYhkDsLju9ddfl7oxwXfo0MHsvPPOAnqjR48299xzj8FKxrX8/4svvhAX7KabbipWPFyoBx10kDnqqKPMlltuKTCYag+bWtxysJA8qkwKoJJqV8EtnnVVaq1ECW4zLwq3x63Z0H/3uPGMffnll8s9NRbc2GZTLEXBrVhmsgjH4ecqZajt27c3V111lex/499AFd+0gDq+WeFCxU35zDPPiEtzwYIF4vbE0oar1QvccKECYHXr1hVwa9q0qdlkk00E3Pg3e9kwt/Nt7sgjjzTjx4+vAG5PPfWUmTx5srhcaYeDFWeccUb5Hjc2y2Kds+B23nnnGf64ix5OyP1iTgqgkmpXwS33a6oUW4gS3MpChgPZ53+nSvGs/Prrr/KFnS0qPK/xmBTTIQUFt1L8tBXImAE3XKLsWbCFDx9QxM+nTp0qAHXbbbeJheukk06S/WTdunUz55xzjmnTpo24R9977z3Z7/bss88KOAFm7IHArem0uKUCN+oG5rCgbbfddrK3DQsbf7CS4Za9+OKLZV8c115zzTUCbB9//LEvuHlNg1rc4lmcSQFUUu0quMWzrkqtlUjBLWTKq33+l/KK5z+fM57VvXv3Nscff7zsiS6mouBWTLNZZGPB5I170VmWL18u+8YaNWpkjjjiCHPJJZeIxa1jx47mkUceMX/++adZs2aNuE6xvj355JNyaAHYw+169NFHy/41DhkAaljNKLg+U4EbVjBAEkCk/Oc//xGrHfvasMTRLnvbAMUaNWrItzvaoD17qtRtcVNw0zAkcX1kFdziUrq02okU3C4I5yrdZ+S/rlK8JOxV5ss5h9DYusKX9mIqCm7FNJslPhZgjf1nuEUBtY033tisXLlSVMFt+f3335crxId79erVniE8UskIhC1atEju23bbbc23334rbXHs/KeffvJsK8i0qMUtiFrZX5uU5SupdhXcsl8reqe/ApGC2/khwW1UxThufLn//PPPyw+gFdM8KrgV02zqWApeAQW3eKYwKYBKql0Ft3jWVam1Eim4nRsS3MZoyqtSW386XlUgLxRQcItnGpICqKTarXN8HQmjE3dp0qSJthuD6Enp3K1pt8gyJ5SdFW597jNWwS2GpaZNqAKqgFuBaef1MzfddGPswjRpso+ZNatM282xAknpvEm3EyJ7wQaRKClQ1XaDzFL210apc1nnkOD2oIJb9jOpd6ZVgBhkhK6wwV/T3hDBBfXq1ZOcmcQz8yqcimRvGHu2bCGm2XfffSexyaIsxEIjYG668ds+c8iAE0KE9khXODzAgYFMrk1XV6a/Z28d7ZItwa+QjYH9dumOpD89a/1Mm9XrVIGMFRg/qK2CW8ZqZX9hlCATpBfF0G7ZqSHBbYKCW5A1o9cGVAB3GEeUSZgeV+HkZZ06dSTGmFfh1M1DDz1kHn744fJfP/7442batGmSGirKQqYDYuzYgwN+dds+E0dt1KhRZv/990/bDYLsUjfBdSdMmGAeeOABia2Wq0LYD06OctABgCPNCtkWbKHPV155pZx0Be44mTpo0CDf7ii45WqmSrteBbd45r8YACqIUlGOt+ykkOD2qIJbkLnTawMqYMGNKP7EKNt8883FWsNLnyPMhKUgvASWLqCJEBNYywj+SuwxTsq4y6677mrGjBkj8cSIIwa48G8sVcS1IX4Z1wBN5MqkfkJtYGU77bTT5Lg04Pbaa6/J3/whIwHgRlJ3+klIjD/++MPceOONEpmaWGrk7AQIST3lVYAoIBWoIQ4aKai4j6TrJP7lNCb3E6oDXYjDBgTRJwLo0mfyf1pw88oXShBb7iO+Gm0AUlOmTDGdOnUyf//9t5k7d6754YcfBKA4aUr+UzSy1sVbbrnFfPnllxJM97///W/Gs8lp0rffflvypJKWC+glZ2n37t0rwC+A3qdPHwniS3YGYgoxRq+i4Jax/HphAAUU3AKIFeLSKEEmSDeKod2yDiHB7TEFtyBrRq8NqIAFN/JnVq5cWSxEgNE333wjkAJcffjhh5ILk9/VrFlTAshyPZkCLrzwQs8WZ86cKZAGyABZ5O3EagWs4PIkDRQWN3JyAmbz58+XGDdADLBI5H+A6dNPPzVdu3Y11uKGqxLIIuMAQW2bN28u1i8iUgOT9JNguO5CyA1AkGC4ZDYAnMhMQAoS2ybQSgYE6gfuiMtmwRAwos8333xzObh55Qvl2DcPLpujjvsJzAuM2thuQDH9BhjPPffcCgEZGbNNg0XuUcCWuSDrAZt+nYW5GzdunPyInKdA9X777Sf/BwAJRwKo2sLvCR0CzJGztF+/fpLJAY0V3AJ+cPTyrBVQcMtaukA3FgNABRlwlOMtaxsS3J5WcAsyd3ptQAUsuOEqJBgs4IE1BusVFql7771XwIgX/ltvvSUwAyQBCPzeL0k5eTnZw4YFCIsO1iBgBUsSbVhwAxItbABCQCGQt9VWW8keLOrnbwtuWL34Q18owBtQA2BeeumlYpXzKoAYv/vtt9/EygXwEEDXCW7AIwnZAUnirJF/lGwEWODQxw1uXvlCgT+sWFgInYF0cZFihWQ/Hf0gSC9jXbJkifTbXbBQAqw77bSTWDuBuIYNG1a4jCwMzA8FayaQaF24gCnA7AQ38pSS2QHrHhbO9dZbT/Kp+rmJ1eIW8MOkl2ekgIJbRjKFvihKkAnSmWJot+y4kOD2rIJbkDWj16ZRABcewIDVx7rXsL5gqWJPFkC0ww47yGZ9wMy6BQEfrGh8KNk3BVANHjzYF9xIuQQAAjfUgauRhOm4EfmdBTebIYBuE/Ufaxfgxj4s4AKIwkVrwQ34YH+WM9k7VimyEnDQwst1ayVhfMAQueIYOwcegEtrcXv++ecF1NiPBsShkx+4ca9XvlDADesXrlEvcMN9ijXu/fffF02wruE+dRf05uAE/a1WrZq54oorxKXsLFgucS9TnHlMAV00oaC1s6ApWRUYH3CHFhSvXKXfVDpAP0+qQOQKKLhFLqlnhcUAUEGUinK8ZceEBLcXFdyCzJ1em0YBIuxzyhFL2F577SWgBGBgRQOisJThagSAePk7N+JzH6AD5LHXDFjA3edVsBRNnDhRrDtAAZYmLDtYeHDRpQM3gA1gwbV51FFHiWUJSxVA2a5dO7G0UQdgRbop9pGlAjegDYsXe84AG8aLW5I8n9mAGy5Gr3yh7P/zAzdcz4yLMQFsgBhWSGfhxCfpqRgTe+LQ7c477xTXJ/PlLMTCcu5hw6rJnFkXLppxIAJABDKBP+CZcV977bXiekVHv6IWN32c5EIBBbdcqLp2nVGCTJAeF0O7ZS1DgttkBbcga0avzUABcqcBcORO4+XPIYNbb71VXHzAAi5E0jKxj43cmtb91rp1a7HSsBeOe9nzBhD4FUCCzf+cdsSSxx/2pmUCbli92BPGKU6sSuytAygBLeAHy9a6664rBx0AEqyBWLlShQsBPBkXhxqWLl0qFi2nq9TL4ga44irlwIbbVeqVL5ScpiR3ByKdFjfqAbzY58dePECTPXfo4SxY4yhAJnUFKewpxJJKYXxAGZCKS5UEx+Q0tfvcOIwCdPvtb6MOBbcg6uu1mSqg4JapUuGuKwaACqJAlOMtOzQkuE1RcJPE2LxwsMCwJ4j9QVgOvE448qLHysM9WI1wufFvTuqxh4sXP9CBBYZTe1g3PvjgA9O/f3+xBlF4IeMmY98ULite2LwAgZbtt99e9g3xAqbwosVKBUy4Cxv3sagASbwocfdhOfE6jci1hx9+uMHiApBwUhJLE32lL7zEcT2yOZ7Tj5988olYsoKcPHT2Dxcm+9acsdJwo/76668CFrjUADivWF+NGzcWVxttY70hkbmzAAhol8uCRY/9as7+0x4uUADGXegvmtJ3Tl06c4WG6adXvlC/+tCUtjmgwTplHXDAw1nof7ZzSj3Vq1cXyF24cKHvsNhjyPrROG5hZl7vzVYBBbdslQt2X5QgE6TlYmi37OCQ4PaGgpu466pUqSIWIGANUMLN43XCEQsKe7BwoRGjCmADdnA34aJiXxaFvUOcIGSDNlYPrFDsreJlxrVsDgfGcH0RzoITj7jpsDJhucH6xLUAlQ3B4F7cWJawemARYk8UIMbeMK/TiFhj2GuGdQoYYgM+/8YtCBjxkgcoeeHSJq4u/o/bMsnCpniA2lkAqlzGK0s1XqCXgwHuAiCxryzpglWTdcmXBdZVPhe1uOXz7BRu3xTc4pm7YgCoIEpFOd6y/UOC2wwFN3GxAUzsr8JShvWK04leJxwBN9x7gAP7mKy1Cpce7kDgDEgDzho0aGDGjh0rMMUJPCxuWNf4A5gBjEAWsIcLir1Q7JGibvYeAW7sHwLIvArghoWHBYUljdOMtON1GhEXHnBH/fSReukPEHfmmWeKlQ8rDXVgkcENB0hSt5bCUYAvIIA4IT7yvWjKq3hmKKnUU0m1qymv4llXUYJMkB4XQ7tlTUOC27slDm72NCObwdnrhCsIcAOcvE44Am5YqXChsceqV69essfHCW52kzibyAFArFm4PDmpOHv2bNO5c+dycLP7nvgZEAWkYUHDbYkblr1VWPT8wA1XGnuJOAnJnibu8zqNCLjZa3GbAm1Y4Cy4AXO4eZ3hLj7++OPycBBBPlh6rSqQiQIzz+trbkwkV6kmA89kfsJek1Qy8A26naYpr8JOXgb3FwNAZTDM8kuiHG/ZXiHBbXaJgxuzgmsS+MJ6RiBTNqRz6s7rhGMm4LZo0SLZkD9w4EAJfQEMYU0jVAX7o3Cp4uIkOKwXuLEXjACxFODKvc/KriQsbm5wox2v04jELUsFbowXEGUvHhv+AU42oLPvLkzRXKWll6sU9z8HTNjDmKq8Ncs/32mYNaf3lrYCwwadp+AWwxKIEmSCdLcY2i1rHBLc5iq4iZUL9ycbtznhhzsRi5TXCcdU4MbBAA4jEMYCaxkvL07bEWwWOCQ8Az/Hyof1bZtttqkAblj9aJuCuxRXqf1/phY3XKVepxGBMT9ww2LHeNkTZcNv0DaHLdyb24N8wLhWc5WWTq5SDkWQCguLNYX9n4Ql8SsKbkE/TXp9JgoouGWiUvhrigGggqgQ5XjLGoQEt08U3MrnDnei+zSg84RjJpMMcHFwYc6cOQbLGycrKcAaKYJwxwKIvORSJR7nhCJxyjg9yQb93Xfffa3m2T/3yy+/eHYryGlEZwX0jVOB9D/dqcBM9NBcpaWTqxQLMOsUqy/rHZc8VmX7GXCvFwW3TD5Bek1QBRTcgiqW3fVRgkyQHhRDu2W7hAS3hQpuQdZM2mstuHEyNdvCHjWsc+SgBJ6AR1yu7oJL1y+Bd7ZtR32f5iotnVylxIarW7euWJw54MIXC5syy2tdKbhF/WnT+lBAwS2edVAMABVEqSjHW1Y3JLgtzgzcMBDxxZmDhrbwpZo98cQOLYRSqUmTJmty3VFOd2IFCxMrixhiqVIr5XoMUdavuUr/H9yKPVcpacFw//fp00dOMPOgI7iyn+VWwS3KT5rWZRVQcItnLUQJMkF6XAztltUKCW5LUoMb+4yJ8UpKwkmTJkkmIgohwNh+hcEHgCOaBeHN8rnEAm75LEAcfdNcpaWbq5RsFHxhIVcphf2guEs5+OOVq/T3SjvFsSS1jRJTQMEtngkvBoAKolSU4y3bMiS4LUsNbhw25DmMEWj8+PECbjZ3uI0Ly37kFStWVEhrGESPuK5VcItBac1VWrq5SoE0Dvocc8wxpl69ehKnkBiGuscthg+eNlGugIJbPIshSpAJ0uNiaLds85DgtjIzVykHFYlcAbgRnowEA/vtt5/IzWFK9t0TwzWfi4JbTLOjuUpLM1cp2RpIOk/aNtyjpGCzmUS8lp66SmP6QJZYMwpu8Ux4MQBUEKWiHG/ZRiHBbdW/4MbBRb4kOwv5oQmmT3GCG9feeOON5bnBiZpBFAoFtyCroMiv1Xv8LBAAACAASURBVFyl30cyw0FOB+dLrlKsruRM9bO0WWEU3CJZIlqJSwEFt3iWRJQgE6THxdBu2bohwe2vf8GNPWvNmjWrIB/xZ4lJ6wY3Do0BdATp54v1kCFD5Br2weVzUYtbPs+OT980V2mwSSukXKUKbsHmVq/OTAEFt8x0CntVMQBUEA2iHG/Z3yHBrXJwVyljJcg+wGbzmXPqHy9JPhcFt3yeHe1bJAoUUq5STXkVyZSnrSSp1FNJtaspr9IuiUguiBJkgnSoGNot+zkkuG2aObiRvpPc6RQyN3GSlEJmJDI4RRGvNcj8Bb1WwS2oYnq9KpBDBd4+73LNVZpDfW3VSQFUUu1u2K2jpryKYV0VA0AFkSnK8ZZ9FRLcamcGbl7jIx97zZo1zcKFC4MMP7FrFdwSkz75holZ07FjRzER2zRe7B8j7t6HH34omzxJSxakJJGHNUj/Mr2WYIwtWrQIPP5M6/e77rVZf4StQu9XBdZS4O5BmmQ+jmURJcgE6W8xtFv2UUhw2y17cAuidT5cq+CWD7OQUB9OOOEEc8UVV5j3339fYopRLrvsMjkiPXDgQAldQbDYdOXyyy+XeDgDBgyQOGXt2rUz3377bbrb8vr3O+64o+nbt6/kqo2zKLjFqXbptKXgFs9cFwNABVEqqfEG6WMxXqvgVoyzmuGYSEGGT5/jzzaafzpw48TOscceKy288MIL8oe9ApUqVTLDhw83PXr0kETqderUMT/99JPBArd8+XIBwzPPPFOuw4oHGBI/Z7311pM8sBwgsJk1sPrxu2rVqpklS5aIVbBWrVryM6Jf//zzz+b88883VatWNbfffrukLsFKiOVwjz32MJi96Vf//v0NAXBpExCj/p49expCdPC7li1byjg4Hs6JI2d/unbtam644QbTunVrATiOh2OhfPTRR83QoUPl5xwdp/9k9Dj99NMleTyRuTlBSlJ5Qn8QF8jrfrW4ZbhI9bJIFFBwi0TGtJUkBTKl1m7aiSjyCxTcinyC/YYHAL388suGaNJADgAEyKUCt80339y89NJL5rbbbhOoOvfccw0RpwEcLG5Yp8gp+9VXXwmwEK9s4sSJ5p577jFvvfWWtAFsAUVsCMXaR1YJgOn+++8v7yqWPg4UXHfddfI7+sXeIPYgAFj8HNcu95CwnRh5wNvJJ58sYyFdCWDHkXDy5FJo86qrrhLQwkLIPUAmm1ABz0MOOcSMGzeuvD/z5s0zw4YNk1Qor732mjnnnHMM4VyANQLoMk7+cJQcSyNja9CggWxsZVyktyLwrt/9Cm4l+sFLaNgKbvEIX2oAldR445nN/G1FwS1/5yanPQPQsJxx7Ll9+/bmt99+kzyaqcANqxmWJuCEguUMUNp1110FtAA3XKX9+vUTKASssOjxs0suuUSupWDpApyIVv3JJ5+Yq6++usJYgTysVMTewVKG9Qr4o44ZM2bIfVi9evXqZUaOHCmQhEUOaxhQRV8AKiyJgBttAWeAFUneuYbf0Y/69evLH8bFNbY/++67r4DbQQcdZKZNmyb6AK3AHf0hBymxfyhYIbESLliwwDRu3Fh0xALI9Vzjvh8LpIJbTpe3Vu5SQMEtniWRFMiUWrvxzGb+tqLglr9zk9OeATYEg/3jjz/ElbjVVluZww8/XCxhfnvcsKztsMMO5tRTT5W+PfvsswJowJUT3OweN+LhkPcNeOvQoYPEy7GFlCOkgwJqcLHagjty5syZ0gYghNsTUOL4Ng8nrGmAF9Y8wA244v9169YVcAOUbFBFC24A5EMPPSTjAgDZ2/fEE0+Yjz76yMyePdt07ty5HNxsfyy4UTfjw2KHBQ/LIlY3om3zOwpuV+AV6APgcJtyuAFLHX1w3w9oYunzylX6c6XdczrvWnlpKqDgFs+8lxpAJTXeeGYzf1tRcMvfuclZz9gHNmbMGLFU2X1luP0mTZpkNtxwQ19wA4pwCWKdqlGjhiTqBcj69OljNtpoI3FPOg8nWHAbMWKEuDvJ1wksAk2AF3vC3OBmLXkkYcctChy+8sor4tLFAnfTTTdJjjngjHozAbdFixZJ39hXB3jS5kknnWQOPfRQAVXcoFgRqc8NbqeddppohcsT2MLiB+hhecPCOH/+fBkP4wN+3eBGO+77qZMULF5FDyfkbNmXdMUKbvFMf1IgU2rtxjOb+duKglv+zk3Oegb4sF+MPWG24Hps2LChmTJlimzwHzRokOepUixXJOHFSvfZZ5+ZTp06Cciw6R+I4m+nxQ23IO5X9qrttddessH/+++/N23bthVQnDp1qgCTs7A3DPcp1q1Vq1YJNOHWxNJVuXJlaRtXKu7LTMANyFuzZo3c27t3b+k3bt4///xTfk6fsL6hie2P0+LGtVtvvbVYJ631kL4wBvrC/jrGjAXRy+Lmdb/f5Cq45WzZl3TFCm7xTH+pAVRS441nNvO3FQW3/J2bvO0ZpyaBIBv7jY4CR6tXr06Zi5O8oRyK8LM2uQcMLAF5zsIeMqxc1lKYTiT2uI0ePdrMmTPHYHmzuUKBNQAUdyyAuPHGG5uVK1f6Vke75Bp19gfrJBY8XKTpIm173a8Wt3Szp7+PSgEFt6iUTF1PUiBTau3GM5v524qCW/7OjfYsAgUsuOHKLISiFrdCmKXC66OCWzxzVmoAldR445nN/G1FwS1/50Z7FoECnO785ZdfMrbQRdBkqCo05VUo+TK+OanUU0m1qymvMl4aoS5MCmRKrd1Qk1QENyu4FcEk6hCKRwEOf0yfPr14BqQjUQVUgaJWgL3KWuJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlVAFVAFVAFVQBVQBeJVQMEtXr21NVVAFVAFVAFVQBVQBbJWQMEta+n0RlUgegVuv/12M3369Ogr1hpLWoF3V80v6fHr4HOjQNON6ptJkyblpnKt1VcBBbcSWhybb765Ofroo9ca8XPPPWd++eWXglHilFNOMU888YT5/fff87bPe++9t/n777/NnDlzAvWxrKzM7LPPPoHuieLiE088MZEHsLYbxeylr2PWAT+lv0ivUAUCKjBouy6JPDcCdrPoLldwK7op9R/QYYcdZm6++WbzwQcfVLjo2muvNd9++20oJd544w1D/X/99Veoerj58ssvN+uvv74ZMGCAZ13vvvuuadeuXeg++3X04IMPNhdccIHp2LFj1mPBcvbf//7XXHrppYHqUHALJFfWF5caMCq4Zb1U9MYUCii4JbM8FNyS0T2RVgGrG2+80ey///4V2t94443Ns88+a+655x7zyCOPmBdeeEEsWlOmTDEjRowwWOp+/PFHc/bZZ5tvvvlGgArL3R9//GHGjBljjjjiCNOgQQOzYsUKc9RRR5XXfeGFF5rDDz/cbLHFFmbdddc1L7/8svy+UqVK5vrrr5d2+vfvb1q2bCn3vPjii+all14yd955p1wzfPhwsQRedtllZp111jFvvfWW6du3rwHcvvzyS1OnTh3z008/GSxwy5cvL2+X67FabbPNNtLuXXfdZR544AHTunVrc+WVV5r11lvPfP755+b000+Xvt19991m6623ljGeccYZMnbA8e2335bx8/tq1aqZJUuWCMzVqlVLfrbZZpuZn3/+2Zx//vlSX/v27c0ll1xi/vnnH9Fm1qxZAqHUQX/4Gfqjg19RcIvno6HgFo/O2kpxK6Dglsz8Krglo3sirVqLG5Bhy7Jly0zPnj3N1VdfbY499ljz6quvmkMOOUSAa9SoUaZy5coCS8AH0HbDDTcIXPXq1Utg7dxzz5X7XnnlFdO2bVu5xpZ+/frJz6655hpz2mmnmV133VX+jbWsRo0apkePHubpp5+Wv4EdgA0IxCoIOGH1mjx5snn00UcFogCwTp06CVx+9dVX5pZbbjGDBw82EydOlHttGTp0qDnooINM165dBZgANP6PVZA/r7/+usAnVjFcmjvvvLPp3bu3jBEoxPrYpUsXc9xxx0kbVapUMdddd53A2mOPPWaaNGliatasKfDJz7/++msDpNLHN998U/aooefUqVNFKzQD4Nq0aWOaN28u4Mx4vYqCWzwfDQW3eHTWVopbAQW3ZOZXwS0Z3RNp1YLb888/X94+lqo77rhDLFpY2DbaaCNx7wE3WOJOPvlk06hRI9O0aVPz2WefCZQANEAIBWsZsAfwHHDAAeIedILbgQceKNY54Iz7uB6IO/PMM+XfgBV/169fX/5gFaN9YAngeeqpp6RtCpCF1Ys9eUAhlqv777/fLF26VKxyTnDbaqutzKmnnio/euedd8QSNmTIkPK6AMhddtlFxtytWzfz3nvvCZQxbvpw0UUXievXWvmoY8cddzQbbLCBWCapb8aMGWa//fYzgCKWuoceesg0a9ZMoIwNu4sWLTIbbrihACv1UNCNa+fNm2fOO+88+eMuusct9x8PBbfca6wtFL8CCm7JzLGCWzK6J9Kqn6vUdua1114zm266qUAWUIJ7EVgBOnbYYQc5DIA1ip/Z/V877bSTAIofuAFHWLywSAFoWOAsuOF6xS350UcfmdmzZ5vOnTtXADcsbQMHDix37W677bbimgS27B63e++9V1y0zr1kgBTAhMWNMnPmTDN+/Hix1gGXFFy0ABVuVvoDrDEWrsMiCbhh/eNeAHDBggWmevXqAm6AIy/+xYsXS31Y/bCwOd3QACXWzLp164qmjNEWANJvT6Fa3OL5aCi4xaOztlLcCii4JTO/Cm7J6J5IqxbcWrRoUaF9rGRYkNijNXfuXFOvXj1xlWJdGzZsmFiicPsBNLgLASPco7hKb731VnFDAm64WH/99dfyurGKpQK3xx9/3Jx00knm0EMPFavXOeecIxYpXLNY/vgZlq7u3buLhQqwPOusswQo04Eb7kzqpU8AFS5KXJm4X+fPny/AyLj23Xdf8/7774sbFkgESh988EEBQQ4pPPPMM2KNwy3KPkBcwgAd/brpppvEWrnJJpuIBZGf4XIF0nCRArxYKekr48KiOHr0aLl/9erVnmtAwS2ej4aCWzw6ayvFrYCCWzLzq+CWjO6JtAq4sWfLXYAvYAlLEHCC6xN3KpYi4IWwFrhU2cCPdYoN/kAbZdy4cbK/jL1jf/75pwCfLanADSscdQFG3LdmzRpx1wI9ACP77oBGDhiccMIJcliB3wF3zlOlWNzom9tViosWlyWHE9gTx9joNxY/6sJ6CFBhMeNU7W+//SbXsmft448/FrADxLCuYU1jz92qVavE7YvlkLrY/0ddaMf4cb/iDmYsHEQAFIFBXKgc8KB+wO+KK67wnX8Ft3g+Ggpu8eisrRS3AgpuycyvglsyuhdMq9ttt51Y0XBRbrnllgJJABGnMTnxafe0ATa4JznlGaQAaxwOwBVJHeyrW7lypVixsEoRXoR6OQlKHzIpWARxaWJpow5nn6gLt+8nn3xSfkAAQMVtyilQe2iAvgBa1jIGtH7//fcVmm/cuLFY75z7+tCFe92uUNywwGG6sCsKbpnMcPhrFNzCa6g1qAIKbsmsAQW3ZHTXVnOogAU39tUVWlFwi2fGFNzi0VlbKW4FFNySmV8Ft2R011ZzqADWOkohZYOwcii45XBhOKouNXCr02p3sSjHXdhrqu3mXvWkdO528KmaOSH307tWCwpuCYiuTaoCfgoouMWzNkoN3PY5sY6ZVZYAuO3TRNuNYUk3CarzqvXNmmX/fsENU5L6HIXpczHcq+AW4ywSQoM9Xc7CicWFCxfG2ItwTXHAgTAYX3zxhW9FznGyX40gun4BZ8P1xv9uQoewB43TnZkUrifsBwGEOTDhPB2byf1RXaPgFpWSqetJ6oWTVLvrdIgf2uKZSW0lGwXWLNnMrHlnx2xurXBPUus5dMcLvAIFtxgnkJfyp59+WiEUBIFaAZswZcKECRIiI2w99CFdnk4yHXDilLAWfoVxsvn/P//5j5xMZWM+MdqCAGrY3KeE32CPmzMFl19/OSFKiA5yuBK4l5AgBB4mO0OmJV1+1UzrUXDLVKlw1yX1wkmqXQW3cOul2O5WcCvsGVVwi3H+eCkTBNYNMISL4LQmITCIC8ZpRmKXEWoD6CF0BUFeCdVBOAtn7s7vvvtO6iRkx/HHH1+eciqTPKGE2aBNshQQbwyLEyEsUuXpBDTpH+EtCHtBvDObFcBKyTiJa2aDzhJehIC+Y8eO9czxSUiP3XbbTcKCXHXVVZJGy5n71J0bFUgl9Aax5CjkPCWMB1qRpouTqT/88IOMA3AjOwH9IXQH1jRCdDiLzfwAwFGILcdJVEDY615i0znzlxJOhdAlNr8qOU/dOVEJxMs1jBFrJfHevIqCWzwfyKQAKql2FdziWVeF0oqCW6HMlHc/FdxinD9eyuS1JMaXLYAB0EJCdRK2E6eMGGfks8RKRcBZXI+4KIkfhiXKmbuT7APk2CRWGfdbl2S6PKFAG3k1CUJLrk/ilhFQlsTpqfJ0AktAGwF7+/TpI+E6bGopJ7gBSEAlAGateIzNneMTVyZxzYipBswQGoSk7Tb3KSE03LlRyXLAz2677TZJ/g7ooQ3x5Aj3ASABdkAScdUAS/KSVq1aVbIpENiXubAFqAOUCf3x4YcfGiyYFmC97kUfZ/5StNtzzz0r5Fd150Ql3Ml9990n9aOzn/VRwS2eD2RSAJVUuwpu8ayrQmlFwa1QZkrBLfGZ4qUMCDjjihEQF2DAUsTJIIDEBnzFWgTAAQWclMQS5JW7kzRNQBtAZwvglipPKK7B2rVrmw4dOggskUEA9yewlSpP56BBgyTmGuAGiPEiIiitszBOYq4BTrhLsZARyJaxu3N8EoQXcMTihwuWMRAXjToIjkteT3duVOKwAZA2XyqQiNWNa3FZTps2TdphjIyJf/N7CoCHuxcrmbOQHxSYJdcoWmPBw2LndS9WT4LuOvOXWpgjdtyTTz65Vk5U2sOyyHwCu35FwS2ej2lSAJVUuwpu8ayrQmlFwa1QZkrBLfGZ8nOV0jEsboCKtXxhicOSRJYANsqTVgqg88rdCSh4gVuqdFPWegdg4cY88sgjJU+nBTevPJ2ADAnW7R43QIlcn+59ZG5XKePjUAZ5P905PhkzY8PKyGECDj5gYbTgRk5Rd25UrIy4Hq2lDxcybk3q4F5Sc6EfIMbPgVMsaLbQf5LJU3D7kuIKCLXBdkeOHClZIsjg4L4XAARenflL0QVgxAoHhDJH7pyo3AOcA262aJJ5I+sB93vcpdTaVXCLe4Xld3sKbvk9P+l6p67SdApF+HtgBNhxusmwvuy9996SKxMAI18oLxXcaUAVbj2AClDB5eiVuxPXIEnN2SvntLilAjegAesWLliyIwAvWKeI7J8qTycprbIBN/qFZcyd4xN3LTADjNEXxr3ffvsJuJFndPfdd18rNyoWQYLs4jKtUaOG6ANg4SolZhR5RR9++GFJCg/84g4+5phjxNIFGLNf0HnalH9zmAIrJZCMlljdevXq5Xkv+U7d+UuZH2d+VXdOVMbtBjevpaUWtwg/cCmqUnCLR2dtJT8VUHDLz3nJtFcKbpkqFcF1vOzZwO4sJE7HTUnqJF72JF7HOgVIYcVhPxyBZEk3BZiwR8yduxMg2GuvvQT6CGdBSZcnFIvbiy++KKc+Kbg0sRjhukyVpxPriBPcOBjhdpUyTvbm2cMJdryAljvHJ3lBsZhx0IECRHEYwZn7FJBy50YFUnHxoiduVvqB65T9cuiHbljQ6BsHCdCHn5O2ClenMzyJTWzPHjjgDhcv++yAOa97sU6685dyCtXmV8UF7c6JWqtWLQU3j89QqQFUUuNVi1sED/AiqkLBrbAnU8Etj+cPqOKEJDCG9Yi9X8CIV+5OwG7ZsmWBR4NVbtGiRQIsxDLD4pZJns7ADTlucOf4BKiaNm1q5s6dW+6udOc+dedGpTr6S6J3DnzYwn1Yv9iH5yzow8ELwrH4FfKVsv/QraPfve78pe78qu6cqJlopha3TFQKf01SAJVUuwpu4ddMMdWg4FbYs6ngVtjzp70vMgUU3OKZ0KQAKql2FdziWVeF0oqCW6HMlHc/FdwKe/6090WmgIJbPBOaFEAl1a6mvIpnXQVOPRVRtwK3qymvIlI+mWoU3JLRXVtVBTwVUHCLZ2EkBVBJtXt0pzqa7D2GpRUk2fuq5Rua1Ss3jKRXSa2rpNqNRLQCrkTBzWfyCC1B5oIoSqNGjcwee+xRoSqyIXAIoFAKe+w4UcrhCb/CqVD2fVHYM0fojCRyfmY6d/SVPtvCARHSXnFwgf2FLVq0kJOwQQqnW4lZx/xmUxTcslEt+D1JvXCSanffMzRXafBVkts7vpm9rfl27jaRNJLUukqq3UhEK+BKihLcosjdGeULlLhezZs3l1OKtnCKk7AWYUq6vKJB606VH5STr4QdsfHJvOrmlCqFww4cEgBYOZVJAN5MSxQ5PzOdO07hcsqVU7CcqOWUKsBJxooNNthAguxy0ter+K0xsiNw38qVK9e6LZOxZdr3TPXM9LqkHsDabqYzFO46Bbdw+uXibgW3XKhaGnXmJbgBEJwU5MQj4RsIUYEFg7AQ/IxwDQQ0xRrCS5IcnZxMJC4Z4SGcuTsJ60BICMJDEKTWL88lL27q5yQgQWDJe0k0fU473nzzzXKSE/Ai/hmnO4MUwI16CQ7rLLzIgS/CZAALBHVt1aqVhJogXAcFGCIume0f/SIXJrHWCBRr84oSisIWgstyOhStCEZLqA3+/eWXX0rydOogTRapq6iLGGromSo/KGE7yKO6YsUKwwlPtCAmnbPQ1ylTphiyQVBIScVJTbIceOUWJcgv4MTJWeon3Al/bM5PwqA487Iy1zakyHrrrSdjw7rG3Gczd4AbmRGcMErqMNYamQ6Yd9pLtcYuvvhiya9qc5CSvow+EdMtVT5Twr14FQW3IJ+s7K8tNWBUcMt+reTqTgW3XClb/PXmJbjx8uIPAABs8TLF5UYwWl7kuKNIA0V+S+KgEfWe2FlAADG2iAvGPbgicdcRSBUoIRirX55LMgDwhxhivHjr168v4AaMACnUc+2118r/CRIbpABuuN2++uqr8ttI/wRskOCcOgkQi7UGNxspmXr06CEuO0CGQLTEBQNCiY+GBsQcI5+nzStKsF5byLYAfBJQFyAD2IAQ6kIrcowSRgMgAeAINwIcpsoPioYECUYHMgqQ2ol5oB1bGMfy5cslBts222wjY+JnAJ9XblFisU2dOlXiwgE59Is5AkaxdJH1wJmXFSBHM3ceUMJ1ZDN3XuAGEAPNxIPDwgj0p1pjZIMglZXNQYoljtRk3J8qn6mfK1XBLcgnK/trFdyy107vjEYBBbdodCzFWvIW3CwUYNHA6sWL7osvvpAckhTghIcvUfNtInL2pAEOvPABPvYpZZrnErDCgoO1Cxcfljn2P2G9AjqwsuEupB+0G6TYFEhYo2x5//33xWIIcOAyXbp0qVimsB4BjMAD8MgfYBMAY/yAJePCskNcMptX1NkftGEc6EA6K6AM4AJ4sCTRDyxvjBNtCWBLovVU+UGJbwb4kf8ULQBP+kIbTnAj6TuQjQUKaGMeqJsxuHOL0jbjQ2OAB0AmIwLAc8MNN6yVlxU3JvVxHwUrHpZE0lZlM3de4Aagoj2ptgA30nGR+9RvjbEvzpmDFO0BN9alXz5TP/crY1JwC/LJyv5aBbfstdM7o1FAwS0aHUuxlrwFN0Bq1apVYrkgJRIWIuADeKCwLwn4oHTt2lWSg2OpASwALV7wWOUyyXP58ssvCzRxHy9rArsCVbgvcWnx4rYFlx4v6iDFz1VKHcADrjZcw/wbyyGWQrIOzJ49W/oA9GCNA5Juv/12GScb6wlk6wduWCNxl5KjExhBLwtuQDG6AF+4cIFRJ7h55QcltRRtW7cidQFXbnBzukqtRlxHO87comg+duxY0717d3EXb7XVVmLpxOIGuGFpc+dlJf0VVk93HlAANpu58wI3UmUB/3wJsHv60MpvjQFuzlRWgBvrkv775TO14Ka5SjVXaZDnSJhr1VUaRr3c3KvglhtdS6HWvAU3LC+8/HAbYmn64YcfBMSwZuA2xBLH3jcADXcWbkWsS/wM9x8uNTbJUw9QhqsUa5Nfnkv2eVE/L13clLjlsHwBPoAMLk0SrGMZwyUbpAAgvPyBMGfBUkS9uAtx8bInDGsaYzz00EPFNYxrE0sVLlFOpvI7a2nCSmXzijrrBR5SgRv9B0rI4UnbWKvYr5YqP2jv3r2zBjcsZECfO7coVk3GSDYDrHO4akkN5cz5CdjNmzdP3JXMEfPqzgOKWzabuXOCGzBM/ejNWLEqolG6NQZgu8GNORo+fLhvPlNc3n5FLW5BPlnZX6sWt+y10zujUUDBLRodS7GWvAU3XpxY2XC5cfiAwwHs2QJs+DmWkSFDhohlDOsThw+AKqxJztyd229RaAAAIABJREFU7MXKJM8lYAUEslGetjlViGsUSMASRAEOATtniqVMFg174uxhA+f1gCbuSjbps0kekOAAA/vzGDeHCnCdAgfUwT4wLF/0g8MIHNyweUWBTVtSgRv7BrFmYuX6+++/xbqEfhZQaJewH+78oNTpPFXqZXHDIgoI2sMJzrF65RbFhQt0k8aLcQKowJ3N+QmQufOyAp3uPKCEKslm7qy10/aTftBPdHaeok21xrAIY1HEKkyxrtJ69eqlzGeKK9WrKLhl8okKf42CW3gNtYZwCii4hdOvlO/OW3DDHeaVc5JTiuSSZM+VLbgM+RkgY4szd2emeS65F8jDAuRMQm5DW8yZM6fCz3O1cIAYQlOQb5O2gUkbXsKdH9OdVzTTPm233XayF41DDWgFwGEBBJCttl66ZVq/13VeuUW33357adOZQ9Sd85MTpM7DF1zvlQc0l3OXbo356ZIqn6mCm7pKw3yegtyrrtIgasVzrYJbPDoXYyt5C2777befxNTSogqUkgJqcYtnttXiFo/O2oq/AgpuujqyVSAvwQ3LDBvrtagCpaaAgls8M15q4KYpr+JZV5ryKh6dS72VvAS3Up8UHX/pKqDgFs/clxq4ndG5NHOVLl9R1Xy/dKN4FpVR139sQpd4QwpuebIAyGLARn13ISAvGQQKpRCXjgMTfgFm3eNkHx+wEnfZe++95XAG+xYzLUcddZTsNSS0Sq6KgluulK1Yb6mBW88LSjNX6azZW5vpM7aLZ1EpuMWmc6k3pOCWJyuAYL9kNyDJubMQtyys2zhVDtKgw0+Xb9N5otWrbjtOCz8cQOGABadunQdC0vUr7JgIT8LpYcKpZFKIFUe4Ek6eciI3V0XBLVfKKrjFo2x+taLgltv5SOoLUG5Hlf+1K7jlyRwBNIQusWElbLc4UUoKL2LIESaEkBtYtAh06843Suoqshk4c7MSasSZg9TWe+GFF0rYD06OcpqUgLhYlMgTajMtEL/OmTOVILfpcokCbqTYqlOnjpxOxQLHiVVbGCdx7QiYTCHYLoGAia/HaVLCfTjzkAJXBDwmUwShSghWTJBeOyZCgxC6xJnDlSDJXnltbdw2AJHwMbNmzTKAKDoSs4+fMQdo4S4AHlY6G4yXzBcE0CXeHid0AcElS5asldeWtgjmS/+wQg4ePFjm068ouMXzgUzqhZNUu2px03WVCwWSWs+5GEsh1angliezZS1RBMO1hRAnxDQjfRVBhAnWS95SgItYde58o8QTA66cuVm5z+YgBexs6devn8RDI5gvMdwIs8K/CdxLrDjiwrlzpgKBWAVT5RIFLsnJSiw3IGXixInl2S5o246TmHz0n5hyQA2niIkB585DCkwRK41+ERiXMCkEsLVj4n53Dlc0cOe1pV2CKb/55puSaxVNaY9r0Q2AI45c8+bNBZ7d1j/SchHMmTiBZPQAfMkZy4MLkCaPKzHg3HltCVFD4GVyzBKjj354xfSz86LgFs8HMqkXTlLtKrjpusqFAkmt51yMpZDqVHDLk9myQEPCdVuwVJGgnbhuWNjIKIDlB4jAEufONwqQuHOzAntYwYiLh/XKCW6kB8M6B6RxH0ABxJH+in975Uylfb9colic2JMHFGK1wupFUGQAx2lxA+qsFQ6YJGPEjz/+aJ588sm18pDiKgb+yKeKNmTTYBw2ryoA6M7hiiXMndeWTBuAF5Y+oGzSpEmSWYOYcEArVj8K8EYWCbI12ELGCgL80hYZGjg5hp4ETcZtChxjLfTKa0vaKyyZwCD3ch35Tyma8ko3c8f1+FFwi0fppECm1NqNZzbztxUFtzyZGz9Xqe0eKZ/IGgFkkXGBdFHufKPshXPnZgVO/MCNBO1khcB6BKhhgbPgRvonr5ypFty8cokSJBfAtOm2cHGuWLGiwj4yt6vUjg/LGS5Udx5SXK2ADv0kCO7ixYvFcmXB7amnnlorhyvWL3deW6AN66V1RQOVWDTZY4euZKewhYwczn2FuIeBN+CSQl5V3MkNGzYUGCM9Gq5br7y2WBVx7TIHBDxmLBbcvJaeWtzi+UCW2otOwU3XVS4USOpzlIuxFFKdCm55MlsW3AAYZ8G6ZNNhzZ0715BKCVcp1jV3vlFSNblzswIJQAMuVsDBaXFLBW6PP/64Z87Uvn37+uYSxR0IUGYDblgV33nnnbXykJL2C9gCJNGI1F+4VW1eVSxp7hyuuCvdeW3RgfpxtwJpuEixsmHJo79Y2urXry9WNVzCHEKg0C9crOy9w6JGYb8h1k9OpFpw4+deeW0BQ4AW1y1prvi/gtv/r/CkHvyl1q6CWzwP+lJbV0mNN57ZzN9WFNzyZG6AEq8cn7feeqsBlrACsakd1ycuQ6xEXvlGAQx3blb2jdkcpJmAG9YtLG/k6HTnTAUYU+USdZ4qxeKGS9TtKrXw5ZbeKw8p4MS4ATgKwMUBDDsmYNGdw/WHH37wzGvLHj5cwtTF3jlACysfe9MIU8IhDfbOsafOFnKlor/z0EirVq3MddddZ7D2kWsWqyDFK68tOU/ZP0d7hHUhvRgWPCx+anFTV2lcjx8Ft3iUTgpkSq3deGYzf1tRcMvfuUnbM698o+zfcucY5TCBMwdp2or/d4FfztR0uUQzrd/rOq88pPSjadOmBoujtYS5x+TOCUrdXnlt0YZ73SFWOCDx22+/hQ694pXXFshmTyL7+apXry579Ow43BqoqzTM6sn83lJ70Sm4Zb42wlxZausqqfGGmaNiuFfBrRhmUcdQNAoouMUzlUm9cJJqV8FN11UuFEhqPediLIVUp4JbIc2W9rXoFVBwi2eKk3rhJNWuprzSdZULBZJaz7kYSyHVqeBWSLOlfS16Baad18/cdNONsY+zSZN9zKxZ8ace03bjmepNup0gIXDiLkm92LXdeGY6KZ3jGV3+tqLglr9zk/OesXeM0BoEpf3666+lPU6asg/sww8/lFOcHFAIUtLlKg1SV5LXsi+NE75Bxx+2z0/PWj9sFXq/KrCWAuMHtVVwi2FdJAUypdZuDFOZ100ouOX19OS2c5yY5ASlTd9Ea5wAJcQFpy3JzuBOweXVI2f+0nS5SnM7ouhqJx4ep0k5KRpnUXCLU+3SaUvBLZ65LjWASmq88cxm/rai4Ja/c5Pznj322GOS2YAMCTbNUzpwI6QGmQIoBLm1gW7JcTp8+HAJEOyVq5QsAWRk4DqsWIAhYTzIS0pIDUKb2MwOWP34XbVq1ST/J1bBWrVqyc9I9k5cNNJcVa1aVXKEkgMUKyGWQ2K6cXKTfpFrdcKECdImIEb9hDL54IMP5HfOPKwE1HX2p2vXrpIKq3Xr1gJwxx9/vMR0I/AwsfL4uTuvapcuXSRl1rbbbms22GADQyw8Qrx43e83uQpuOV/2JdmAgls8054UyJRau/HMZv62ouCWv3OT054BQKSlItgskAMAAXKpwI1YZ8RRI0UVUEVGAGKaATg2fynx1dy5SglYS7Bb2gC2gCIC0mLtI6QJwOSMa4alj7RaxErjd/SLNFM1a9aUjAX8HNcu95AlgZyqwBspwBgL2RUAO9JbTZ48WXSkTRLUA4pYCN15WAlQTDJ42x9SXhHgmJRgZK0455xzTO3atQXWyO7AON15VYmfRyBfxkU6MVJd+d2v4JbT5a2VuxRQcItnSZQaQCU13nhmM39bUXDL37nJac8ANCxnZDpo3769xDDjQ5gK3LCaYWkCTihYzgAl4qUBWrgVcZW6c5XyM7I/cC0FSxfgRAYEshyQ8N1ZgDysVGQ6wFKG9Qr4ow7SfXEfVq9evXqZkSNHCiRhkcMaBlTRF7IcYEkE3GiLoLeA1dixY+UarzysXGP7Q+BcwI0sB9OmTRN9gFbgjv545VVdsGCBpOVCRyyAXE/8Off9Nk+rV67SbyodkNN518pLUwEFt3jmPSmQKbV245nN/G1FwS1/5yanPQNs/vrrL4nojyuR/Juk0sIS5rfHDcvaDjvsYEicTiGjAVY74MoJbu6UV7hjO3ToIJkKbCH7AzlSgRpcrLbgjpw5c6a0AQjh9gSUSF7PwwlrGuA1ePBgATfgiv+TSgpwA5RswncLbljmyI7AuABA9vZ55WEF3Gx/LLhRN+PDYocFD8siVrcbb7xxrbyqQJ9NI8bhBix19MF9P6BJoGSvoq7SnC77kq1cwS2eqS81gEpqvPHMZv62ouCWv3OTs56xD2zMmDFiqbL7ynD7ES6AzAV+4AYU4e7E6lajRg3JvQmQ9enTR3J34p50p7wiyfyIESPE3XnMMccILAJNgBd7wtzgZi157733nrhFgUPSUOHSxQJHuqw77rhD4Ix6MwG3RYsWSd/YVwd40uZJJ51kDj30UAFV3KBYEanPDW6k/kIrXJ7AFha/zp07i+UNC+P8+fNlPIwP+HWDG+2476fOTz/9VMEtZytcK3YroOAWz5pICmRKrd14ZjN/W1Fwy9+5yVnPAB/2i7EnzBZcjw0bNjRTpkyRDf6DBg3yPFWK5WrnnXcWKx0J2jt16iQgY/OX8rfT4mZzlbJXba+99pIN/t9//71p2/bf8ARTp04VYHIW9obhPsW6tWrVKoEm3KtYuipXrixt40rFfZkJuAF55CflXpLM02+vPKxoYvvjtLhxLSm1sE5a66FXXlUsiF4WN6/7/SZXLW45W/YlXbGCWzzTX2oAldR445nN/G1FwS1/5yZve8apSSDIxn6jo878pX4dJ8E6hyL8rE3u+7zyj7KHDCuXtRSmE4k9bqNHjzZz5swxWN6w+FH88rD61Ue7y5YtE+i0xSuvapD7va5VcEs3o/r7bBRQcMtGteD3JAUypdZu8JkprjsU3IprPnU0LgUsuOHKLISi4FYIs1R4fVRwi2fOSg2gkhpvPLOZv60ouOXv3GjPIlCA052//PJLxha6CJoMVYWmvAolX8Y3l1qqLU15lfHSCHVhUiBTau2GmqQiuFnBrQgmUYdQPApw+GP69OnFMyAdiSqgChS1AknkwC1qQTMYnIJbBiLpJapAXAoQfJhgw1pUAVVAFch3BWbNmiUn9rXEq4CCW7x6a2uqQEoFysrKJPZb3EXbjUdx1Vl1zoUCpbaucqFhIdWp4FZIs6V9LXoFSu0BrOONZ0mrzqpzLhRIal3lYiyFVKeCWyHNlva16BVI6kGo7caztFRn1TkXCpTausqFhoVUp4JbIc2W9rXoFSi1B7CON54lrTqrzrlQIKl1lYuxFFKdCm6FNFva16JXgMTzBAyOu2i78SiuOqvOuVCg1NZVLjQspDoV3ApptrSvqoAqoAqoAqqAKlDSCii4lfT06+BVAVVAFVAFVAFVoJAUUHArpNnSvqoCxhhyvpI31VnIAfvzzz+bf/75J7RGXvWHrjSDCrbffnvzww8/mN9//z2Dq6O9ZNdddzULFiyIttI0tTFeZ77fXDZOBpHly5fnsgnPuuMco7sDSc1pEms4V5/ZzTff3Pz0008VnitRPmtiX5BF0qCCW5FMpA6j+BUgvtuxxx5rjjrqKHPAAQfIgGvXrm3uv/9+ebCS9P6FF14wN9xwQ1ZitG7d2vTo0UPAaaONNjJDhw6V+nJdGjdubO644w5JTcYYHn/8cXPnnXfmutny+tkfxJ+mTZtGAr7pOn7KKaeIzitWrDDrr7++efLJJ81dd92V7rasfn/00Ueba665xvz6669m4403lrURx5zGOUYvYeKe06TWcK4+szvuuKM56KCDZJ2efPLJ5osvvoj0WZPVYtabyhVQcNPFoAoUiAIjRowwdevWNTVq1CgHN34GZJ199tlmp512MhMnTjSHH364WN+CFgDik08+MVdeeaUZPny4WPY6duwYtJrA1z/99NPmjTfeMEOGDDFYaJo3b24eeuihwPVkc8Mee+whMLPNNtvEBm5Tpkwx9913n3nwwQfNSSedZPr06WP233//bLqf9p7XX3/djBs3TuC+Z8+epl27duawww5Le1/YC+Ico7uvScxpUms4V59Z1kqzZs1MvXr1ZI0CblE+a8Kur1K/X8Gt1FeAjr+gFOClC2hYi9sTTzxhZsyYYW699VazzjrrmHfffdd06dLFzJ49O/C4sJIAEd99952AzPXXX2+eeeaZwPUEvQFowyJUs2ZN8+OPP5r+/fubt99+O2g1ga8HeJ999lnTvXt3gZu4LG64LXE//fXXX2bChAlidSNJeC7KzJkzBernzZtnWrZsKWuHcea6xDlG51iSmtOk1nCuP7OE+7DgFuWzJtfrr9jrV3Ar9hnW8RWUArizOnfuvFafAYtVq1aJtcQJbri9Xn75ZTNs2DC5x4LbnDlzfMd92mmnmWrVqlX4/WuvvWb69u1rdtttNzN37lyD1QIgvOiiiyLTz6/d8ePHy/6ryy+/3FxyySViQTz++ONz3i6AOH/+fDNgwADDCypqcPMbL3vpcEXhDt50001N165dRfNclPfee8/wcl+4cKHZa6+9zN133y2WlDhKXGN0jgVLbS7n1E83Pne5XMN+7RI6KJefWSe4ZfOsiWOdlWIbCm6lOOs65rxVYOuttzb33nvvWv0766yz5ECCG9x4EWOtArrWXXddgS1ezKkOKWDlqV69eoU2qKdfv35m4MCBsueKlz2WKGvZi0Iwv3avuOIKM2bMGHEf7r333gIXUVqFvNoFhIHE3377TYZWtWpV+Tf7B1evXh3FcMWa5qXzokWLZLzA9cUXXyzzl6sybdo0mdOXXnrJtGrVyvTu3Vssb7kuwERcY7RjYdP8K6+8ktM59dMNC3Eu13AqYMzlZ9YJbtk8a3K9zkq1fgW3Up15HXdBKuAGt27duombjZ/zUsZSdfDBB2c1tldffVWsd7hdL7vsMtOiRQt52ee64ILBdYhLD4hjv1ebNm1y2ixuZQ572DJy5EjZiI1rMYqTuak6/9xzz8kJ1iitmX7tYc1EW9YGsAqcnnPOOTnVlsrjHKMdTJJzmsQaZty5/sw6wS3KZ03OF2CRN6DgVuQTrMMrLgXc4LbJJpsYNkbjXqxUqZK4/di3lU3p1KmTvOABlzVr1gjAYX3LdcGFN3jwYDn1+Mcff4grePLkyblutkL9uXCVprKSABm2oHeUFkZnu9aCyc/+/vtv06FDB/PNN9/kXFtch3GN0W8wcc5pUms4159ZNGTNLF682ET5rMn5AizyBhTcinyCdXiloUCjRo3M559/HjoGGi/bBg0ayGb2uAuhTb766qu4my369jj8QEyzjz/+OOfWxKIXM80Ak1jDcX9mo3rWlPpaCTN+Bbcw6um9qoAqoAqoAqqAKqAKxKiAgluMYmtTqoAqoAqoAqqAKqAKhFFAwS2MenqvKqAKqAKqgCqgCqgCMSqg4Baj2NqUKqAKqAKqgCqgCqgCYRRQcAujnt6rCqgCqoAqoAqoAqpAjAoouMUotjalChS6AuQvJdDvt99+m3dDIVwBhWT1WvJLgSpVqhj+EFOuWAufDULakNcz08L1hPh55513JMC2u2y33XbyozhCuGTaZ70ueQUU3JKfA+2BKpD3ChCsliTwxIujkGeTRPRxJYPPRCCi1xPLLlcJ2+nD/7V3b6E2fW0cx2dJSokcEiXnSEhJKRQhhCIXlITCBUVKyIVDDuHCoUhEyIVzScmFQg5XclZSLohCQnKFm7fP8zY0W+9eay/e/X/fve0x6p+/veaac4zfGNvznb9nzPkIzkr/KKukpFVTtdu3bxdt2rRp0koV+rZs2bIoX7ZixYqaXXV97+7705c3N6bDyZMnC6+RSHUvqx1fqx8qBAC/Xbt2NXa5Bj9vCo31Ydy4cfHaHWXi9MV7+LySQ5UKpcW8SLrepnrH3Llzi9mzZxdv3rz5j69t3LixmDFjRlS7+Juht1698nH/ViCDW14JWYGsQE0FvAfs5s2bUQyd0wbavK9KU2fTSzqbQ9u9e3fAjzJS/1RL4PbixYti/vz5TXaZO3fuRPBvyhJjOnfhwoWiT58+jdZhBb1e0AtK/okGIAG1ufn8+XPVS9Tqh6oWqV7vn/Txv9VYGTjVLujkP78P1ty5c+fixdVAXvUNZefqbdevX49qFtOnT2/wK127do1yZc7p3LllBTK45TWQFcgKNKoAB2H58uURcFOdS39fsGBBcfXq1ajWsHXr1mLy5MmRRlXrc8eOHRFwuHIjRoyI9JEX+3769KkQrFIR+T179gRcqJQggIEXwUqQ3rJlS/Hly5c41/jx4+PcCnkLkhwPtUBTwXbXGjNmTICbIKhUl1qr3bp1i9Tp4cOHI8BKp6q5OGDAgHDnPn78GMfp3+XLl+NnGkCTnjL2stNRCW4cpIULF8Z1nEtZINroCzdG6ScaeLO+z9RE1f+BAwfGeI1NmbFqUFGvfvphHNKRap8aIx0XLVoUmqa5Kx/34cOHqI5x69atAjA5jt6+R2elscopcefkmhnvtGnTinnz5hVnz54tzpw5ExU2vLTZWli7dm3Uy+Xg0XTnzp1xndGjR4e79P79+0KJMZr8+PEjXrrsRkCZsytXrjTYD84T19c5Hz58GE7i77ayxkOGDCn27dtXqG/qnPpufqwr7iQot5aePXsWLy92rPWtD8bw8+fPQpkrdWeVnKMh/caOHVv3mh8+fHjU56WF8w4bNizWrvWqlJW5cG6VUNQwbqwG8e/qkY9vuQpkx63lzl3ueVbgf6IAuJJCA2kK0Vc2LhHAACIC4MiRI8OV47CcP3++6Nu3b4BDu3btItUqUAKl/v37x3e4PEBNoEwF3tu3bx+lvARHabavX78WyigJni9fviwE8pSmTWlbwOQcoOnu3bvRTVA4Z86cADK1Vzds2BCpp3fv3kWAHDRoUFScEIzBCxclOUKdO3cOMAEdqZXBjQMCOPVZgFe2Sj+llAHMkydPAvzsX5JOo9ONGzdCA+MdNWpU9BcMC84NOW716Gdc+q4J+JMmTQqNAdT27dsLWtIRQHFuXBPwgkf9dXwaO4jSvy5dugSMlOuprl+/PsofHTlyJPZlgV/gcvHixWLNmjUBxubauO7fvx/nB8r6MGvWrF+pUvC3adOm0AQc9+rVK/oOti9duhRzUNmPBw8eFKtWrYp1dfDgwUIN1tTUYHWtyuZmQlo7tTK40btHjx4xb/40XusJ4Et5aq9fvw6gtHYOHToU46QlmDVe4Eoja5EjrY+Asp45s+aNg1ZgD/y6lvUN2Kx17uKECROifq817NyukVtWIINbXgNZgaxATQU4AqBEYANxlU1QE4AEeYHswIED4a6ApKVLl0Yw56Zw3gANSFi5cmW4TIK0cwMZ6SfOGoDxGaeLMycFxZ3hOgiyQGnJkiXRH5AFxDg3vgNKACNAadu2bQQ/+9EESSAFnGwIT+4F969Dhw4BdaDH9302dOjQ4sSJE8Xz58/DWWwI3O7duxd941wBQY6J70+dOjVADDzZw7R///5wGY8dOxYuof4ncAJJ9AV4tcCtln60BkYAsFOnTgFUGi2NIaVK9Yv+XDB/AiWfHT16NMCDo+lc5gmcJfBMYwfaQPbx48cBvKkWqVQ5KDVXPqeB74Lcnj17xv/7WdrjBkTMJ0fPucAvSE7gVq0f1VKlwIn2lQ1Ag+ZKcLNeXNO8cbjApfnhBgNCgAj4gW66abHGzZ8mPQpcrTnzz73bvHlzQKK+JHBrbM78HrjRoQOwrgZu1p8+gTs1fXPLCmRwy2sgK5AVqKlAche4ICnFyW3gAAgm3ANp0BSIpQgnTpwYAU6qB7hxEgRrP7Ope926dYXN4sANKCVwAwCAwN+/ffsWgVNQlMIShAEWly6BWxkuyuAGvICkfgE4EKUPgmuCM4OWYpUa9Jk0XdrnxcE5ffp0TXDjjAAd0FZ+IhCs6bOxcAeBFEcPEIEW4GYTe2ocI65ULXCrpd/MmTMjJatxZIAEZ6gS3OxHBIinTp0KmARS3bt3D2ijXRo7zRzDjZJqLTfHaVKy4IYDl1xS4OhzTtrTp09/fQ3Y0SmBGycM+KUHFdIcJHCr1o9q4AaepDMrGwgDyJXgZq+avqf1bA1yaKWY/Xzx4sUBvG5AwBtHErgdP348PgdY5txNibQvEDTfYNDNSgK3WnMGKjmiaU9c2otIA2vJTYH+cNzSVoUMbvkf6qRABre8FrICWYGaCtgHZB8WsJA+kioCW9qUKVMi1SP1JZg5Lu0P8nMOU73gBqgEP64aB+XatWvF9+/fIzBKUwEE7kPZcePYAEYtgZsn9Dhe9ml5ChBIOSfXAmgAzfT6Ba5ICr7ljfH1gJu+CrzGDXTAJGjhbIEUKV4t7S/zagfpX/ujOCeg1s8EZlDwp+C2evXqSFVL+Ur1AWfpPa6QPVRcNfsFuZAgiVPEraQlSAKY5bHXAjf73NJeM2CY0oogXD84rqCRiwieObHGBrgTuHFY3Qzoh8L3gFarB9z0V0r80aNHv9YsB5GzV9nSgwOV4Ga83NWOHTvGugJ99puB+r1798a6A/ogWN+sS+DGqaWZ+QO7jgF/5p4OIN0NTT3gJkXshiE9xZtcavv3XMf6c+MC3KTq/Wl9lced/9lqvQpkcGu9c59HnhWoWwEQsG3btnDINEFLMAMGgEOKDKClxu2wodu+p379+jXouAny9r0lxy09GOB8KQ0FhFL6TcB0jM9tJAcpleCW4EewAwv+njafc49ADIcLXGhAgPMh8JbhJaUFq6VK0+tAABM3KV1H4E9776TfpOFcz3Ea14tDxLHS0j6yaq+qqFe/lG40Vo6k8XF/AAmY0ABXSnunOeQogZfy2LlhgLshx427B9ITjKYxelqUhmBdulDqU5OOBKjWSgK3t2/fhsMIWuivWVeAF4Qlx62yH3TkJgJyx6YGdNK6LC9oexjtiUutrDFI5JKldK9j6cTt4iSnOZJql0qnmwcyymvceaXzOXJS8BrAqmfOwGBKWfsewJXOT2ufSwzc/N4Bwd69e8eWgtyyAhTpuSRJAAABoElEQVTI4JbXQVYgK1C3AtwJQdmdv/RludnTBIwE/JQ+q/fE0mBSQxw80CSVCNo0wWzw4MGxVy39rJ7z+h7Xh6tTDuC+y9EAN1J6v3POhq5rT1a6DuCspzkewDTlu7nMzatXrwJ6y42zBJBTOtc8AQHQW29/6xlT+RjrAGg39OJYDqpUugcRvLsMkAGV5OLWuhZIo3dT6OY8nF0PaKSX5tLQTYF1AWi5hVKjXFFp+WotpVSlYcvp2d/RzXp1LS5eWpPGC465ec6dW1Ygg1teA1mBrECzUCCBm4Ce29+tADC3Xys5rEZb+QTr/0sB8CQNmhxZ/Si/BqdavzxkYu8mh9YeuaZq3E2ga4/k71RkaKrr5/M0TwWy49Y85yX3KivQqhSwUV26yh6s3P5+BbiANu9z/zy84PUXzaVx4qRxuaLcM2noelxZQOphj/RqlqYYjz12rl3L7WuK6+RztCwFMri1rPnKvc0KZAWyAlmBrEBWoBUrkMGtFU9+HnpWICuQFcgKZAWyAi1LgQxuLWu+cm+zAlmBrEBWICuQFWjFCvwL24Jnuy1kueEAAAAASUVORK5CYII=", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting\n" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.match_weights_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "8520c766-1873-41ea-bbfa-fd00adafba9b", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v5+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.9.3.json", - "config": { - "header": { - "title": null - }, - "title": { - "anchor": "middle", - "offset": 10 - }, - "view": { - "continuousHeight": 300, - "continuousWidth": 300, - "discreteHeight": 300, - "discreteWidth": 400 - } - }, - "data": { - "name": "data-26e581481633eb3ba26503d0ee6f9b19" - }, - "datasets": { - "data-26e581481633eb3ba26503d0ee6f9b19": [ - { - "bayes_factor": 1077.8881005286016, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 1,077.89 times more likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 3, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 10.073991699123706, - "m_probability": 0.9999938432638393, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\"company_name_l\" = \"company_name_r\"", - "tf_adjustment_column": "company_name", - "tf_adjustment_weight": 1, - "u_probability": 0.000927734375, - "u_probability_description": "Amongst non-matching record comparisons, 0.09% of records are in the exact match comparison level" - }, - { - "bayes_factor": 5.003662767779489e-07, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.9` then comparison is 1,998,535.97 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.9", - "log2_bayes_factor": -20.93051210485307, - "m_probability": 1.0424297432873944e-09, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the jaro_winkler_similarity >= 0.9 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "jaro_winkler_similarity(\"company_name_l\", \"company_name_r\") >= 0.9", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.002083333333333335, - "u_probability_description": "Amongst non-matching record comparisons, 0.21% of records are in the jaro_winkler_similarity >= 0.9 comparison level" - }, - { - "bayes_factor": 2.398603557108488e-05, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.6` then comparison is 41,690.92 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.6", - "log2_bayes_factor": -15.347445746758282, - "m_probability": 2.2644199317030986e-06, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the jaro_winkler_similarity >= 0.6 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "jaro_winkler_similarity(\"company_name_l\", \"company_name_r\") >= 0.6", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.09440576059316999, - "u_probability_description": "Amongst non-matching record comparisons, 9.44% of records are in the jaro_winkler_similarity >= 0.6 comparison level" - }, - { - "bayes_factor": 4.296928613203078e-06, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 232,724.37 times less likely to be a match", - "comparison_name": "company_name", - "comparison_sort_order": 0, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -17.828262760151212, - "m_probability": 3.891273799259086e-06, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.90559423940683, - "u_probability_description": "Amongst non-matching record comparisons, 90.56% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 6967.79868693931, - "bayes_factor_description": "If comparison level is `exact match postcode` then comparison is 6,967.80 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 4, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match postcode", - "log2_bayes_factor": 12.766487226975748, - "m_probability": 0.6623540185783227, - "m_probability_description": "Amongst matching record comparisons, 66.24% of records are in the exact match postcode comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "lower(\"postcode_l\") = lower(\"postcode_r\")", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 9.50592932341548e-05, - "u_probability_description": "Amongst non-matching record comparisons, 0.01% of records are in the exact match postcode comparison level" - }, - { - "bayes_factor": 459.722336375772, - "bayes_factor_description": "If comparison level is `exact match postcode sector` then comparison is 459.72 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 3, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode Sector", - "log2_bayes_factor": 8.844618953374955, - "m_probability": 0.0655513205697529, - "m_probability_description": "Amongst matching record comparisons, 6.56% of records are in the exact match postcode sector comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]? [0-9]')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]? [0-9]')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0001425889398512322, - "u_probability_description": "Amongst non-matching record comparisons, 0.01% of records are in the exact match postcode sector comparison level" - }, - { - "bayes_factor": 108.80454006003016, - "bayes_factor_description": "If comparison level is `exact match postcode district` then comparison is 108.80 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode District", - "log2_bayes_factor": 6.765594946596759, - "m_probability": 0.05042155305902204, - "m_probability_description": "Amongst matching record comparisons, 5.04% of records are in the exact match postcode district comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0004634140545165047, - "u_probability_description": "Amongst non-matching record comparisons, 0.05% of records are in the exact match postcode district comparison level" - }, - { - "bayes_factor": 17.98989670573241, - "bayes_factor_description": "If comparison level is `exact match postcode area` then comparison is 17.99 times more likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match Postcode Area", - "log2_bayes_factor": 4.169114997843882, - "m_probability": 0.22167260253148252, - "m_probability_description": "Amongst matching record comparisons, 22.17% of records are in the exact match postcode area comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "\n regexp_extract(lower(\"postcode_l\"), '^[A-Za-z]{1,2}')\n = \n regexp_extract(lower(\"postcode_r\"), '^[A-Za-z]{1,2}')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.012322060885477316, - "u_probability_description": "Amongst non-matching record comparisons, 1.23% of records are in the exact match postcode area comparison level" - }, - { - "bayes_factor": 5.119283255544672e-07, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 1,953,398.45 times less likely to be a match", - "comparison_name": "postcode", - "comparison_sort_order": 1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -20.89755482977835, - "m_probability": 5.052614199149832e-07, - "m_probability_description": "Amongst matching record comparisons, 0.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 4, - "probability_two_random_records_match": 2.2824681576908936e-07, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9869768768269208, - "u_probability_description": "Amongst non-matching record comparisons, 98.70% of records are in the all other comparisons comparison level" - } - ] - }, - "hconcat": [ - { - "encoding": { - "color": { - "value": "green" - }, - "row": { - "field": "comparison_name", - "header": { - "labelAlign": "left", - "labelAnchor": "middle", - "labelAngle": 0 - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "m_probability_description", - "title": "m probability description", - "type": "nominal" - }, - { - "field": "comparison_name", - "title": "Comparison column name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4p", - "title": "m probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4p", - "title": "u probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - } - ], - "x": { - "axis": { - "title": "Proportion of record comparisons" - }, - "field": "m_probability", - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": "bar", - "resolve": { - "scale": { - "y": "independent" - } - }, - "title": { - "fontSize": 12, - "fontWeight": "bold", - "text": "Amongst matching record comparisons:" - }, - "transform": [ - { - "filter": "(datum.bayes_factor != 'no-op filter due to vega lite issue 4680')" - } - ], - "width": 150 - }, - { - "encoding": { - "color": { - "value": "red" - }, - "row": { - "field": "comparison_name", - "header": { - "labels": false - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "u_probability_description", - "title": "u probability description", - "type": "nominal" - }, - { - "field": "comparison_name", - "title": "Comparison column name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4p", - "title": "m probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4p", - "title": "u probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - } - ], - "x": { - "axis": { - "title": "Proportion of record comparisons" - }, - "field": "u_probability", - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": "bar", - "resolve": { - "scale": { - "y": "independent" - } - }, - "title": { - "fontSize": 12, - "fontWeight": "bold", - "text": "Amongst non-matching record comparisons:" - }, - "transform": [ - { - "filter": "(datum.bayes_factor != 'no-op filter2 due to vega lite issue 4680')" - } - ], - "width": 150 - } - ], - "title": { - "subtitle": "(m and u probabilities)", - "text": "Proportion of record comparisons in each comparison level by match status" - } - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAADqCAYAAACCwtIEAAAAAXNSR0IArs4c6QAAIABJREFUeF7s3Qm4tVP5P/D1JiEJmYuQeSgZQiFTJVIypDIrQ4VMISFD5iEyFokylDEaDEXIUGSeSmRKMitjkvyvz+q/3t/zbnvv8+yz99lnutd1neucs/fzrOG71nOv7/o+97rXhCWXXPL1FCkQCAQCgUAgEAgEAoFAIBAIBAaFwIQg1IPCLW4KBAKBQCAQCAQCgUAgEAgEMgJBqGMgBAKBQCAQCAQCgUAgEAgEAl0gEIS6C/Di1kAgEAgEAoFAIBAIBAKBQCAIdYyBQCAQCAQCgUAgEAgEAoFAoAsEglB3Ad5ov/VNb3rTJE3473//OyqapN4zzjhjevXVV9Ozzz47LHVWh9lmmy3985//TC+88MKw1CEKTWnmmWfOMDzxxBMjBo63vvWt6W1ve1t68cUX80+k/iEwnLah2NOhtqPKGeoy+tdjUVIgMHYQCEI9dvqy45b89re/TSb/kl5//fX06KOPpnPPPTeddtppHec3VDfMNddcabPNNksPPPBA+uEPf5hWWWWVdOihh2aysuKKKw5VsS3z/fjHP56+9a1vpQkTJqQLL7ww/z2S03rrrZe+/vWvp6eeeiqp+1hKN954Y27Ohz/84fTSSy+NiKYddNBB6aMf/WhSty996Usjok4XXXRRXnzsueee6ZJLLhkRdRqKSgynbfjlL3+ZZplllnTwwQdnGzoUia1ZffXV0/XXX5+22WabSYrYaKON0g477JAefvjhtM466wxF8YPKU594Pq+88sr80y595CMfyfj1wlY1zhsDVb6Teg6UV3w/PhEIQj0++z23uhDq5557Lv3jH/9I008/fZpmmmnydyNp4l1jjTXSfvvtN9HILrLIIvn/v/71r3kC6Xc644wz0gILLJAXH4cffnjGcSSn9ddfP+26667p6aefTqutttpIrmrHdfvxj3+c3vKWt6TPf/7z6d///nfH9w/FDRtssEFad911069//ev03e9+dyiK6DjPSy+9NM0wwwxpn332Sb/4xS86vn+03DCctqEQaov9s88+e0gg23///fOi+IYbbkhf+cpXJilj4403Tttvv322i2uvvfaQlD+YTI844ogsfFx++eVpt912a5vFxz72sXTggQemZ555Jvm7m9Q4bwyUVyf1HCiv+H58IhCEenz2+ySE+phjjsnKr/SrX/0qveMd70i/+c1vMgFjlK666qq01FJLJcT7c5/7XPrQhz6UJ2YEnKpNTUDY7rzzzvTNb34zrbrqqukPf/hDWnjhhfMk/vjjj6edd9453XvvvbmMrbbaKlFTpppqqvTaa6/lz7fbbrtM6hncaplIkvpQ0pXl2m9/+9uJ8UNoEamB8jTJfPrTn0633XZbmn/++XN+f/vb37KyfOutt75hBLzzne9M3/nOd9K73/3u5PUq5fO4447Lk+TRRx+dll122Ymfn3766enEE0+cmIeJA9m/6667Mj6zzz57WmGFFdISSyyRTIZcVf7zn//k7ylM2ocQIuYwVp4J8cwzz0w//elP83cm6KWXXjr//a9//Sv3Dfy99lX+HHPMkUzmn/jEJxIl8oQTTkjHHntsbquy7r///twXrQi1+2Hqt+v//Oc/5//1Z7vyNdokCZ8HH3wwLb744umVV17JZavLQgstlPM77LDD0s9+9rNM5r/xjW/k/N/+9renOeecM9+r7eo99dRT53Lf+973pskmmyxPqkipz1rhqvw3v/nNWRFWFoz1D5eLJ598Mv3kJz+Z+Lal3biF7yGHHJIeeuihNOWUUybqlvHouUDa9Uu7vKuDyFj2VuCyyy7LY8xv41yfq5t6lj5sHHylHIoe7D1z8EOIJc/ZmmuumevI1Uieni3jqF0/FkJ98803p0UXXTTjq88o6M3cpuC9xx57pOmmmy6Xc/vtt+dFtr/bPR/q38mYGMhedDomdtxxx0lsw0D91m5MdGo3Ggm1hdWWW26Zx7Xn1kLGs8w+wJc9+upXv5r71bPOLh1wwAFJHx111FFp3nnnzd+xn8YU5bkOofbcvPzyy2nWWWfNb/E8P96WnHXWWTm/z372s+mxxx7LtlP/e/PnDWA1eaPleb3jjjuyeDDttNOm++67L9ulr33ta7lN/tc+zzCS73PPNRugrnvttVdaeeWV0xZbbJEmn3zy7KLn7ci+++6bFwPmEuPYc2qOgU0h1OrNXrGf8vve976XbV1j8pyq6/ve9778xtCcAEPPGDtUnTf0Ryf1NN+Yy7yFhKH5wJtbz+QnP/nJjmzCOKYZ46rpQajHVXdP2tiiUBdCzTghD4wfAoT8mXBKYiQpwuedd142XiZxExZCg9wxhl7XIYZSIYv+ZnRXWmmlbER32mmn/D3DRBGXFx9YRBphrZbJ8CMWJhtlIMBIUtXlg6Fsl6drvc6TGFpkQpnIeSHkpY3ac8UVV+QJQ3kmwuIWg8yYILSzEL5TTz01TzIlUYYQkZIQKUbcROKe559/Pi8kYIboUo8RcoRbeSYP35eJT5lLLrlk/p+/tolNsvBBTpGX8pnPzz///Dy5lMm42gfNCLX2eg2rja5VRz/avfzyy+eJrFX5yNDvf//7XB+Tj5+qX772+B8GH/zgByfpe5/5Tj+4z7iwyDJR6SPjAXGTLMCQ70Zcl1lmmUwUJAR0k002yZO3yfjvf/97xkAd4I9gtxu3CDVlrLRF/fQREqDuiEervBGYamp0+Sh1dA1ckQjJuLW4qCZ1ML5gor8RWkn5nkvPqu+0D2GCoT733LXrx0Ko5WUMljdR+m/bbbedpA7swDnnnJP7BpbGhr8tsL7whS+0fT7YlE7GxPHHH9/WXiBlnYyJ3XfffRLb0K7fBhoTnq+6dgOAVUJtTOgTyd8W1zBkX//4xz9mUl2ei7nnnjuTXf1q8a0/uecYe37g77fvLKQHUqiVKW9j35iR73LLLZcXZfqdvUIQlTnPPPNkW68+1URA8HxJypZPSZ5Pz4bkmbLgu+666/JYJK6wnWwYYcA4Qp6NeSQfOWXTy5vF6lhkayxoy3OoDX7Yf8miuXHxR+R4z3vek4m08j0T7Jz8iSLVeUM9Oqkn+2EuMx/ssssueWHh7eRgbMIk4MY/YxaBINRjtmsHbljVh5rRZfBLMnEiEMgtosVoUlEYYuSlkDOG1UTOYDKeJgdGiIH71Kc+lbyCLeq3iZvhZuSKkbLqN4FIrcpsfHXX6CdZVPVWeSJq7jGxUaopdCZyEw4iVU2lLHggzxS5H/3oR1nhZey9yv/5z3+eNyQ2e7VbJdQUFeUw5OqAnFC9EMUygZkcL7744oy976699tpMsJEa91IFJWTShIhc+jHRmPAKoUZAEYA//elP6Zprrsn3FLcdyliZaBpdPgqW8qPGmCz1h35FUIt7QLPyTdKFPPHZdK/JWfryl7+cyaL6lcmQ76eFj+vUQ5thafJ1nzbAmSptErWYQO6pdbArhLrgalxWCfXJJ5+c5ptvvkxESz3cj+hsvfXWbcftLbfckidyY4K6bNKHo0la/1E+W+V9zz33TDKGWhFqyplnSN2MgWZE5ne/+10mLxZqyt97772z8nzTTTfl58szhdhSlymCniWKZllkNutHxB0R8LZInsiG8WesN/O3LfVH2pHZouJ65il07m31fFA/OxkTympnL97//vd3NCYsAquLbQR2sGMCXnXthgFQJdSeB4RMXxm/xpRnnbDAfpZ+1ofqzC651hgsC3Tqteu5DXke2V7Px0CE2vNlUeZeb0IQcph4Y6Uc/brWWmvlfjK+jSn2uhmhNrY33HDDbMPZciSZDSwLISo7ko9wItOeXfPGYostNrGtja4U7N1MM82U30pZlLN73mixfXAphJoAow38xdkKarjntJq0ATbUcqIFO8cuqJ++q7oKeo46qSfc2xHqdmOr0SYMPBvHFWMBgSDUY6EXB9mGQqgZIOoB48VVA6Fi3ItaXN1cVUgzQ43wSMXYFnLDCFXJwtVXX51JE8PKeDKAyB8iLJnkEXLfUwNN4NUyByLUxai2ypMrAuNKnTBJUX8Yc6TgAx/4wCTomeCopVU1t5DYsgmyDqGubphsVN2rBVLgyqbGouyX702eyLyE+CN7hYj5zMSsLhRq2DHwJjLEsto2riWbb755U5cPxPeLX/xi0+8GKh+mxoFU6l4dAz43nkyG+tCGI4S6LLZ8/4Mf/CAr6u6zkQsWVTXMNZQmCxuEunEjapVQGzcm46pKriwLg5NOOikv9lqNW2Pevci4hYVUxq06I/it8uYaUk2tCHXpw1NOOSW7tSAW+r8k9eYb2yxpB7XVeCiqdbkOyUCqW/Wj64pCrQ3aWsZ0tS9KfhYsCy644MRFb7U+Az0fsCuEus6YQHja2QvuAJ2MicbFtjE32DHhDVpduwGjKqG2gKu+OSoYlkV8ESbYPlh7Gwhb91THRBX7Cy64INvJgQh1tU/ZAjZBn3AXKQtkbhcWa9xJkNnGVBTqIlIQVAgMxYaWNylcQow741I7qqksHhoJtTw8T2WRXr2nuHwU7KvzA7cXhLua2PzqBkx2T1td2zhveL46qWcjoebCZkFZFOp2Y6vRJjR9qOPDMYdAEOox16X1G9To8tF4ZyGCxaj63sofMagqW0UlLZEEqooTw8mASsgslc5rR68KEQ+vXcvOb2SbwoUYVcsciFC7tl2e1BgTo/YiR+0IddnAVxRg9S44FMWuDqGuEvJi9E0S5VUn9QrRRKrUn5LKN5YySrXxKpbx59crUX24uxQFvBDmgn2ZnCg0ZdJ0LUWpTIbNXD4sNpDN4uJh0vG/RY8FE5LQqnyLn7rkCQkwWcIfqaDKWciVtwvq7HvjRZtMwgg2da1KqBvbUCXUykBMJAslREJ7LO7g0m7cIjaNm6GqhFq+rfJufF3eilAXgtmKUKs3Qq3OXn9T5rgKKNfz5vkxqetTY8pY4o6DUHsuW/WjBZXnrbopsR2hLuPVoppvrYWVvQ3IF7yNi1bPhzHXyZhA8trZC/71nYyJRkKNbA12TCCvde1GI6HmSkYR9cyyH1NMMUX2wbUgpKQW94HiklWeP65f3lB5vvWbzy2iuVFYnOq3gQh1IXzqVJ4vBJ7yXWwXsuvZUlZ1D0iZA+oSar71xiz7pC3GBheMoq5b9DcSags6WNjrgfCzuUi9Z7C4fFQ3JRbBpZFQmzso7hYh7Kd5o7i6eRvq7U1VoS7uP3Xr6c0COwV3wkOJllTwbTe2Gm1C/Vk5rhzNCAShHs2912XdB0OokSh+nJJJnprI/UEyidggVYgDX0GuC9RpRshGHBMzgmvCsKGK8WXYKeS+Z8QaCXVRLbzK9OrafdXXuhSddnnyZaw7MZq4ykY3KgMSYfKTygTQKaEuqnEhTAw9w2+yNOF4vY+0KI9ftUlV0m7k613veleerLhzeO1q8ihkp5FQu48Ka3OQe/RRmWRabUosihGf8jLxF4WIz2Or8k2idcmTtiACxdfd4kQ7Shxpr3ONKyQGCTAmTGaSz9XHoqEdoaZ2ayuFziLNAgUO/CyR43bjlnrdjlBz+2iVt7FYTd0Qaq4byimvumHgGUM8uPhwV7KJDLE1wVuUee1OJWzXj41RPtoRakSQa4qE1BubFqz+RpjaPR/eVnUyJmDezl4Yy52MiUZCXfAczJgwJuvajUZCzSaKumHxaMOdZxrx8wxz/5JsVi1vG8qeCAtZdtkiwsLJ5mkbWSWuO962DESoXet5KRtI/c+2IvPc6kp0EDbY583CTXZCqLlqIJzygbc3fJ7folATBby58PaHfYeN8WdOMI4tfrWbnVbvxuewFaG28PQdrJBe9kvZPtd33ErkVeYN9RlMPeGE6JvLpEKo242tRpvQ5VQdt48SBIJQj5KOGopqFkJNQSmuBdVyminUvqc+mCyKzzWDQ0X0qri8JmO8EGnXULOQXpMGw+kavpElIZbUB0pbszIZZ5MPw4kk8g1msGxo8RpyoDzLpsSiUFOsqETNXD7Uie8eUlR1PSj3+r4doUbstbWR+JlAENDijlBUEhMClweYlM036mVCQ7SpU34j4SUhB5tuumneoNOMUMOWYlnysygQH7dVbFcTNR/h0p8mDOoYN4J25cO/Lnmyoaj4UFc3P2oTJdpkqkyKaBkz8kc4+A97+9EM1+JSYmHCV9aEXTbcyZvLQNnU1m7cNgvXVVWoCxlolXczQl2UrUY3mOLm0ujyIQ/9jJSWRarPtB8RQMpELigbwixKXFeIS7t+LITa2w9YIhzcEpq5fCizqIqlXXCUvzHX7vlQt07GRPGhbmUvOh0ThVAX22AxO9gx0andaIxDXd3YB0fPn/HtLZTkDUMh1+Vtks+JFsoum5PZAwtrz3SdONTsqWe/2BoLykLwLE6Ma8+YPQ76tFkyzhD58qawYNHo8mExZ2yWDY/yshCwKFNvCrK3bd40qI8FBaWYe1p1jJfNsc2ew0Koqc5lbJU6s4Nlw275TPvYs8Z5w7jtpJ4WHtT0sujxrLCHhVAPNLaGYs6OPEc2AkGoR3b/jNjaMdgmeKt/rzX9lgqh5u/nNSN1l9FtPNmLkUL8GKnGSAfNGl1OJmTMWp2K12me7cBVntfrXpMjlr04DRFmXtGb7O++++5JMFEeJZCKhYg1xlQ2QSGM7isT8kD11z+PPPJIretNtPrDmwJ+kY391Wn5zepWorGUjU0mJH1fPU3QJAgHmA/2NDj3C7dloxLFv5pajdu6D1q7vOvmUec6eHt749mp+mMaJ/rVGwrEqTEN1I91yi7XcDfRRzD0pqOaevV81LEXI31MtMMUhhbMMDTu66aCrzcsFlSdxlh3v7cZnoFGBboIKc18mOvWr9l1ng2Lhmb22XPH7crC1I/E7cPbGEJKtyfe2iBNAGEfq+U3mzc6rWeJNtS4cbNg0C+b0E3fxL39QSAIdX9wHjelVCdI/pGRAoGCQJVQj6SDJ6KHhg+BsBf9w55rhTeAiKeFszB8kQKBQKB3CASh7h2WkVNKOSyR1/t2Q4+k48ujc4YfAa/jHcJgI5OIBpECgbAX/RsDfNW9NeQPzH2kMWJG/2oSJQUCYxOBINRjs1+jVYFAIBAIBAKBQCAQCAQCfUIgCHWfgI5iAoFAIBAIBAKBQCAQCATGJgJBqMdmv0arAoFxg4DNTnxCy2an4Wh4iUAzHGWXMkv0j7JBuE5dbNpyX6eb3urkXfeasmGtceNj3fvjukAgEAgERgICQahHQi9EHQKBQGBQCIiCIASisIQiXwxXEpll9913z+Edhys5xdSBGMLh1U182R09LexgY3Lgh3CLQkGKBy2esmgVQlw6JEf0Cn7xwtIJo1k+r1t2ua6ENxNWUpSGSIFAIBAIjEYEglCPxl6LOgcCgUBGQKxfsZsdKjScaSwSamEUkV2xhxFq4daQbNEiHPgj/q+4yuIVCy1WPh9MP1iMiBO82mqrDeb2uCcQCAQCgWFHIAj1sHdBVCAQCAQGgwB3Baf3iafr0CCHXjjxkGpNWXVyGqLr9DTJ4RjNjll2gIMj7x2kIca30zyd8ik/h084zZFbiVPcqKgPPvhgPmFu7733zvdQxh3hLoJJVaEWg9ix5+655557MvFHRKm58ihJvGl1U1fxdKt1ddCFQyrcTxV2iqiDNOQh/BlF2veUXQq1kz79IMLaUg5vEdf6yCOPzLF/xa9GkPfZZ58cbcURyuIHayvXGQfoUKO1D47wKYTaMdMOyNFWIe+4izj0xSmUPneKn+TwpTXWWCMf5iGuuQNauJV85CMfyfWFqdjujjkXbcIx0k633H777ZOT9yIFAoFAIDDaEAhCPdp6LOobCAQCGYFypHs51vikk07KxNZBEU4RROgcDoNoOn2OilqOcS4QIp6IXCGMCCYiKmZ2yc9Jhwj2hhtumA+cocw6sc2BNE7NREjl00iokUanEiLUDj9Cyn/xi1/kfBDkkhwM4aRSZJbijkS/973vzXGCuWM47tuBRk55Q9i5WDgQx2lz3Cwc5kLdPeuss/KCAjEXGk29/P7MZz6Tr3WEvDjEDghad911cz2cImnBAS91pfQj6hYM6oJQq0PB5y9/+UsuU2jMAw44IJN8BFxbi8uHY9CRfvdYbCDT+gRuVG5uI+edd14m+/yntVdCrBF5OEYKBAKBQGC0IRCEerT1WNQ3EAgEMgLcDainSJ2EADuZs/gDU1MROcSNcuqgoeWXX36SEwaRTMSTwjrXXHPlI9ARb3nIb/7558/kUnJk+EwzzZSVa3F83WcjXTkuuVtCLV8KNlWXm4WYwRJCjYwi9urgdLnSRoo1kgyLoiiX7w488MBMfLliWCBQ7CnZVHDfyV9bEWqLDyfMlaO7keMvfOELLQk1zKsuH1Ufam8LqM/wkRx37th5ixoLEeRdm2wilY9FhsQX3imiFgCRAoFAIBAYbQgEoR5tPRb1DQQCgYzAQQcdlAlyOfENAaaU2kQnUTzPOOOMdOyxx2YF9/DDD8+/q8fIc49ASG2wmzBhQnrttdfSK6+8MpFQzzjjjKmc6ug6BFu+CDqCKHF7QBQHItTcLc4///yWCrVyyvHUlOg//elPWdF1+AlSLP3yl7/MR5FTlyXuLZRgLhaUYW4pfktUaL7JVHkkFUmmCGvjZJNNNpFQVzcl+h6ZR+7XXHPNQRFq+FC5q+n111/Pbdhoo42ya4hFA0WeMq/fJH2lPVTzSIFAIBAIjDYEglCPth6L+gYCgUBGYL/99svEtqpQzzDDDBOVUcTu9NNPz64LrQj1tttum9VdhBSJlSeSXhTqan6FUCOBO+20U/Zn5heMjFOOWxHqoi4XJbuVywcifNttt2WyyTeci4dTJauEmjLOrYPiK5V2UdFPPfXU/FlReLWJQs0lhl/yAw88kOvNpUX+fKop1Outt95EVxin6fGN5ut82GGHDYpQF7eOHXbYIdeHb/acc86Zrr766qyOw1m9vV2wyIA1v27+5vyxy4IghnkgEAgEAqMJgSDUo6m3oq6BQCAwEQEuBTa1VX2oOyXU3B4ouAje7LPPnhVTftftCLVy+fpShqnkfJoXXXTRNxDqQoyp19weTj755EwgWxFq5Bn53nnnnbO6jPzyu64SalEwuK4ccsgh6dxzz82bGbmtIOt8xbmtIKpUdso8gk6lvuKKK7ICbFFgEycV398UfS4f8uGDbfHBrYXrB+W4lQ+1RcyWW26ZfyxAbOosPtTayS+c7zQ1/YQTTsg+4K7hr25jqLy5iVCrkXfuH4g4949DDz00RnkgEAgEAqMOgSDUo67LosKBQCAAgeLugIAiuEhaI6E+7bTTsr9zVcm1mbAkLhxINBcIySY6GwJt0KOsNirUNvRRh5E+pFN66aWXcrQP0UGQ7Gqi9lJ9pUcffTQrwq0INZXWBkPuEdR1ZJN6LN/i8oGkU3IRbdf5oaojogi1z9VFop5vuumm2XcaubZwcD23EL+nnXbaTKKVIV8/PudHbtNg1YWmWRxq/urw5UIC40Ko1eHMM8/M/SNx7bBJEbbF7caihYsNFZ6S3hixJUZ4IBAIBAKjDYEg1KOtx6K+gUAgMBGBs88+O29k68ZNAJlbYoklsnsFEkq1RW4Rz3aJ2szHWgQPBLFVkp9k01+zVKJ82BjoWhsdByrbQgDZv+mmm95wyqEQdPPMM08OV1etFyV7iimmyFE2tJma7W/J/+JO27So7XUTn2vEWdi9apKfhQlyb3NoYz2Q8WuuuWZi3anVSDe1ux2WdesV1wUCgUAg0G8EglD3G/EoLxAIBHqGAMWYn7SNbK0Ia88KG6KMqoR6tLahW2go4PzQ9WWkQCAQCARGIwJBqEdjr0WdA4FAYCIC66+/fo7LTCkejYnKzcfapkbuI+MtUbj5jofv9Hjr+WhvIDC2EAhCPbb6M1oTCAQCgUAgEAgEAoFAINBnBIJQ9xnwKC4QCAQCgUAgEAgEAoFAYGwhEIR6bPVntCYQCAQCgUAgEAgEAoFAoM8IBKHuM+BRXCAQCAQCgUAgEAgEAoHA2EIgCPXY6s9oTSAQCAQCgUAgEAgEAoFAnxEIQt1nwKO4QCAQCAQCgUAgEAgEAoGxhUAQ6rHVn9GaQCAQCAQCgUAgEAgEAoE+IxCEus+AR3GBQCAQCAQCgUAgEAgEAmMLgSDUY6s/ozWBQCAQCAQCgUAgEAgEAn1GIAh1nwGP4nqDwJve9KbkhLV//etf+SfSyERg6qmnTpNPPnn6xz/+MTIrGLUKBAKBEY/AW97ylvTWt741vfDCC+k///nPiK/veKxgzMkpBaEeopF/5plnpvnnnz/997//Tcsuu2z+PRrS1772tTTTTDOl3XbbbUiqO+WUU6Zvfetb6cEHH0zHHXdc2zKOPfbYjN3BBx+czj333Emuddz0rrvumu644460+eabD0ldhzPTn/70p2mOOebIRzLfeOONw1mVrsr+zW9+kxc+a6yxRnriiSe6yituDgQ6ReB3v/tdXtA9+uij6VOf+lSntw/L9Z3YyGGpYAeFLrbYYmnjjTdO7MBFF13U8k5tvuaaa9I///nPtOqqq77huuOPPz4tvfTSye8f/OAHHdRg5F9a2vXwww+nddZZZ+RXuEUNx/qcXKdjglDXQanDaxiHq6++Ok2YMCHf2YwQdphl3y5X76mmmiottdRSQ1LmO97xjvSrX/0qPfPMM+ljH/tY2zK++93v5nocfvjh6Sc/+ckk1/p8p512yjifcMIJQ1LX4cz0Zz/7WXrnO9+ZvvzlL6c//OEPw1mVrso29t/97nenrbbaKqtLkQKBfiGwyiqrpEMPPXRicR/5yEdGxZuSTmxkv7AcbDmbbbZZ2nbbbdNvf/vbbK9bJeqza55//vm08sorv+GyL3zhC0n/mRNcN5YS0Yh49Le//S2ttdZao7ZpY31OrtMxQajroNThNZtuumnabrvt0quvvprVkXvvvTd9/vOfz7lQSXbcccf0xz/+Mc0777xpmmlkqWTcAAAgAElEQVSmSb/+9a/za6yPf/zjWcnef//90yWXXJKv33vvvdNqq62WvPJ6+eWXM3mkfiOju+++e85nnnnmSdNOO22655570le+8pX04osvpuWXXz7tt99+6W1ve1u6+eab05xzzplee+21tOaaa+bPDjzwwExWkX5q8TbbbJP23XffrAj7DOGlKlZfr1GW5XvLLbekD3zgA+n111/P9ZGnOlAXtt5665zfBhtskNVVi4vnnnsuKxTKvPjii7MC7l7qMkO53nrrZbwQ+b///e/pjDPOSGeffXY2nup45513ZrXfK6XrrrsuG2bG9Zvf/GY2rtp56aWX5nvf/OY3ZwLnb+259dZb832Ujfe///25PfLQzhNPPDGdfvrpE3sXhj//+c9zf3FVmG666XKf7LDDDrmO+uDpp5/O/8NaUra6wOz+++/P5B8+MIZNqfezzz6bdtlll3TbbbdlhX7FFVfM/f7Rj340G9Mrr7wyfe9738skmnuEcUPZbUaoW+HVbrx0Mu6Q4A996ENZMdLfMP3FL36R+w+WBx10UFphhRXSZJNNllVnihFFvVm7Pv3pT6fZZ589j3vYNxt3xlo7vPbcc8+sWt10001ZpVIH40n/S96qrL766vlZeuyxxzKJUnfp6KOPzr+/+tWvdvgUx+WjHYGTTz45UUiLHf7Rj340cTx0YssWWGCB9J3vfCfNMMMM2W55ztk2z+lAY5NdY/uNWc+7594bJzbMM7HFFlukWWedNeflLRyb1MxGlr5gg9rZum7mjGp/Tz/99On888/PJA/ZZZfY9bPOOivPX+ph0d/OJsCJbWPX9cF5552X7WMzm8mesuXmuCeffDLbDAtwOLHh3/jGN7ItPuKII/I1g537qm385Cc/mW2ytxjmBnXw1vOoo47K/0t/+ctf0pZbbpleeumltMgii6TDDjsszTjjjLm/1NdcLSHCO++8c8ZKW32njmxSsznFfGtcsKHKXXTRRVsS6lZzTLtxWXd8a5dxp6/U3dxpjmMvtd04MHfOPffceez/9a9/zWP+z3/+c77PHGU8WwQdcMABGT9tdw28vv71r+c58JVXXpk47tvhhf/gN7PMMkueR5dbbrnMZzzLP/7xj1tyF3MIDmIuMH9/+9vfHhbzFYR6CGBniAxMRnj77bfPA9HA+Pe//52++MUvZpIkIbgeqJLK/3yCERmDepNNNslfG1RInoQ0GzweYAlRQXokBtkP0sjoKdPvch0iyTh5tfT4449n4kYR8RC4FnFBDhnSdddddxJCfeqpp+YHv7Hu2ie576677sr1QxD9r4yZZ545/20SUjZsPDg33HBDJp0//OEP8/0ebg+1hDR6GItSXv1OHshucflgnIpqoS5+TGDqYnHje24hPlduwdyDW33wijJU+gPm+lB57mXgGUgGE6GUr9eZvvNZWfT4zsM/33zz5fL0j+/8ppqZZN73vvdN7Hd1MFksvPDCOa9Sfxc0EmpGqhVeJuhW4+W9731v7XFnkbHgggu+YWwh0gzjPvvsk8fuU089lQ2fNiK6iHVjuxCG4vLh72bjzqTZDi/4lHFQHc9rr712XpwZ78aH+hhb1bcfxV1mqN64DIH5iCx7gIDnH1HyrCNIFsEWf0QCqa4tY6/ZlmJL2Vl5IxbGX1n0y7NxbLKthIHG7xAVQsIVV1yRv7vvvvuyuMJGEk++//3vT2IjKbwlFSXX/81sXTdzRhV2Nru4aHi+tbmk6rzlefbMN7MJ1Gm2zvMPG0IJ/JrZTO0uNlx52qbvkDvfVV0+EOrBzn3VNhb1vHzGVrDjH/zgB7OtVg/9/sADD2TyW8aB8i0SJPW6/vrrm9rkCy64IH/vjWx1TiE2XXjhhW+wr80UavyhGV4IbLtxWXd8I77EEKnaz0QwcxXb++EPfzgvYmCjL8vYL2+zS9sK3ylumEQYi0Xke6655spYGifwbDaHwcsC5aqrrprIdcpiuHCiVtyliGzEIAsyAstwpCDUPUbdgKOeFRJtsCIdlDIKSSHUBtVnPvOZ9Mtf/jJ/b7Vn8F577bXZsCIAxf+0uDwceeSRmcgZsB5SRsXAt9L+7Gc/m4ks42x1asAWY8ToGYiMBELtYX7Xu96VrzWIET8+hlaB7Vw+ykNKpTjmmGPyA83QahP1hjKoTBMAFYOy7Ed9LAZMbIx01eXDZxYP5ZWgPJZccsmsRCCIcODygFjCD+lUD5NjI6GGORwQX+UU43jOOefkFTbjcdlll2X1HyFvR6i1kYFA9Kj78tCXDKR7kUt18jcjoTx9SOXeaKONsioheZNAzVAmwu5+7TUJIX5FtTb5m2z0lQmL+iM1Eup2eFmdG3/NxgvFVl51xl0h1BQ1KkuZzCgpcEH+GUuEAabGK+Orbo3tqvpQn3TSSU3HHXceRrQVXsi6cWDCsyg75ZRTkgXCaaedlttLHfIcyAfh8FbEs2QS1zfGhbcVkcYPAsbEXnvtle0EO+L5Mk69TfMWo64tM2Yt7I0vb6K8Sbn88ssn5mV8tRqbCAR7RFzwbLKZyBpC7VnxP5vMHrOb7IO/iRCt3OIKoW5l6wY7Z3jmPWeSOlkoFBtKVfc2yHNf/HyLir7HHnukKaaYoqVNYM+qLh/sbzObueGGG2abIsFZO9lAz7C3Zc0I9WDmvmaEWnsPOeSQbN+RY0TewkT+bI1xQ5lmq8ti/XOf+1xWrtkt8/cyyywzcZ4yH3uzjIQah4VQlzmFDSN8NNqzZoS6FV7GL7LdalwilsSvgeZqIod2I9MWLgQjbxK0mahFVCPeeXv6iU98Is9PxAsku3CF22+/PT8jK6200iT7mszb8rFwJG4h11Rnz2MrvPCbQqjZ8DI3WMBY1JjbWnEXYgps8Zrh8rMPQt3jOaaooR5SrhaUPpN+MUSFUHtwXVvIC4PFgP3+97/PxIqRpuAirAwatdRAdJ1JAplAqJXBPxVx8/rtoYceypOHB96AR2zlJ99CqH3n3uLj7XMqn0FYh1AXn/CigqsroovwFlcR7aK6VFMzQu2B98B6eBDcairqT1mMeJg8tK575JFH3kCoy4MuD8YKMfXgFkxNGAxHIaWtCHVZDcunTFCNw4TyxLjAtvRPuWaJJZbIeOoz30mlTMaVYUE8GSkTdsGuuiHHpI2cNxLqdni1Gy+MpLzqjLsyJi3C1Jeq57WjcecVtrapWzVVCXVpVxU/ebim2bjzSrcdXoiGMQZDdTP5eZNikeOH8oXcS4iGRSlFMtL4RcCY8ErcM2Uh6Jn0rFr0sl+FUA9ky5Bhz3khPxAtJAdJZktbjU3CwHve8568mLPALvYboUa0Sz6ll4gaiBabPxChbmXrBjtnsNuLL774xAHjeTUfFaWS7ShqPTWwiBTmI/i2sgmNhLrY4kabWRYKje0qc1YzQj2Yua8ZoSb6UKstgMxpzRI3G4SzOg7KdWUDORGIAFPyYYvcoy+rc4q5w7ggNFlAlcVfM0LdCq+i1LYal9qDUA80vvWp+nu7Z8EkqS+ba3yX3+Utt+8bCXVZpDZuSjRvWwyV5D55EmlsuG+GF1fSQqjNF8YfMYqbjbc65spW3GUkWLsg1D3uhaI4N2br4fKahiGtS2wKubVa9qAbiFaMVmAeRAOrqLdVQk35QD4LiaeKFDWEQu1aijLllVqK4DFcBr/B3GpTYt1JyOTB1YKRpmB4iJDmZoTaA0+RKP6NDIE6eu1p9VzdlDgQoa5uaKkSahODV5jyQ97Khr9WhLpMIvqwvLbixgMbChXjz5/XJOJB16cmSQaSCwJfMkZKnyP0SHxxA0IIkWk/xdiVtxpl8lBuWaw0Eup2eHmDoe+ajRdqc91xVwg13EyYxhl1wLizSEFUTGbqUshslVBXN+FWFWp+ic3GnQnFc9MKL4vS6jioEmoLD8bZj3FeXp2bKPirRxp/CJj8y5u+xtZ7W8Te1LVlFDJv/uzJoDxKRcX06h65bDU2LTrZWotZb7SolvJgK9hyb1nYYCQBmaWMIqoW2QMR6la2rps5o/gNs0NIX11CTV1uZRMaCXUhR402Ex5sVGO72hHqwcx9zQh1ERl8V1RVb97UhQ+xxTql3MKo7Idia8xx3vxZ3OtLc4U3FnA0f3lDStXVl9U5pRBN5BupLCJcM0LdCi9KL4W81bhEuDsh1OYoXEA7C4k3vj0nnidKvTmPC08joTZf66cqoSa84DvUZIsH15g3CX6waIUXXAuhVh8LkSqhxlVacZeREEktCHUP55uq3xkS6YGS+AV5lVIesrrEprwi9OoIObB6Y3QNar6srQg1V5LiP0j18ErKfcU4IbyzzTZbdrPwKkZ9ygraZwa+SQBxqQ7SupMQQ8ztwSYWKgxyQw3nQqFsn6mL/00u1AcPsgeJcaLQFFW0F4S6LET0B8UKCZbqEGrKrLowrl5d+bv4AcKZ6mR1z0/MgqSo0oVIen1L2WVAJK/RkNQqoa4Sd/0l2QQkNRLqEmKpGV5ecTNIzcaL/q877gqhLm9Z9IE+sejRRqTVK1/k3RsDSb8h9I3tqhJqLkWtxl07vIovfXFlqRJqdaCQWDya0CzIjF84G38mS8nCJtL4QIDt8YbCM4sYSUiR8VOeQcSnDuGQl8Ve2R+CYHk2C6lojERUHZtUTTbfQhHxKc80m4dUsX/yYYeQfH6m7K59JVUbiXiV1CwaRlU8GOycQaippjKXDaRQw9cz18omWCxb4BNXYK6dzWwmn9fGKB/a1SmhHmjuG4hQFwHFJj3kDylka7k4EDnYQX3DDuoLb1bNnQisuRIhZ+t9xxUCD2gk1Igiccn15nU2k31uRqjNFc3wKu6ircZl3bm6KNRwKRsTkVZtVk8LU8+OdiqTcFGHUJtrik88Mcni0gZ8dtr80govKnQ7Qk2sajWHeOvhGff22v6G4UhBqHuIOh8rK+/GmKdcMSgVPqcsG2xWgXzLuG4stNBCmWQ1unzwNfO9AVRSecXjVaOH1Ss+CgpDRRlEIpAJq2flIH/KZcyLTzE1kXErGwA9PF7T87suftrKYzR8V1J5SKktDH8rlw8PHsJVXhMxpl4duZ5xLUarqBFUoKp7iP89GCYWiwh+1RQTfmwMnAnIg4lo8SfXTkpBK9XGJMidhTGAAcJJFa7u+tfGsimxqibAz6TJ6JWNOWXzBEyL3677qbfIO3JIcTK58eeVTKoIpbzK5r2Co++NDwsvZbhWHfVPszjUrfBqN16Kq1GdcVcIdXUjKLwpEMZOqafJDp5ILXXe5AmnaruqhBqBaTXu2uHVOA6KSm9MUPMQjrJIMkkZCyVEl+cFnsZRpPGBQHFHsHiv+s6XN1XeUFHNjMeBbJnn0qYwdqu4yBnzxhfRot3YZLcQZ4tcY5CftDdYReUse0JKr/DtpuiyuY02slxT4jW3snXdzBntCDU75A1f2etS3ZdSIlOxXY02wZssbmrmAnMV29HMZpobmtnwQqidWWBRTHwprgODmfuaEepiE31nP5FFEX/cYtONIWPHfGNuLYmri7Gh/dV50/dEFu476t9IqH3P9cgCSirzY9nsV61juzmm3bisO1cXQq2e+s+P9rDnnh8iTQnlZ14kDEqUdxyA7S8KdQkkUDYl4kO4QJk3jVnPDXW9FV7EJ8KVcgr/KAq1hZuFXqs5pHCg6ubjflu8INT9RnwQ5XmoEE4r3jonzpW4v9RRhshrRg8Gg03NK0m+Bm4JAVc+R0A9KEUtHUSV8y3IFeW22UmGVqxW5YyJpC4mOQ8blbeXyapX3ghYCf1kYqNilfBqA5WHWPPDpOhX68dYeN3JFcQrscbXThZL2mpiqS5OmpVn0oEZn76BTn9sh1en46WxLoVQ2/T5pz/9aWLUl3KdemoXwzmY12ytxp38O8GrWm/4cw2B82DqNFD/x/fjGwFjXgg+ZE6o0jpjzOLTD+KHkBEJqH7FlQqiFoIi93iT0vjMN9rITnqgWxvQSVmuHcgmIEKws2F8IJvZadnV6+vOfQOVYQ5EeNm4auhYfUKBNq81HlRFpGG/KdvI8UDJnCRRhtuldngNZlxWyyIycU8UGQNJ5n6kbdW5ChbGqYVgp0n9EOO77767a7yqZbebQzqtYy+vD0LdSzRHSF4eQJtpKJzVUDgUaKrpeEslLrh2U4ooTeU1XtVYjjdcWrW3SqgpzJECgUCgcwQQJuose1PsMPvjjdpoPv20cyT6d0fMfZ1hXSXUwxVqrrMaj+yrg1CP7P4ZdO1sBvD60E5zh4rY8NHu6NdBFzRKbixRJmxo4MPID72O2j9KmtfTanpV6fWqDTMDqSc9LTgyCwTGGAJcmbwhtCmbjyx3NW+gIg0dAjH31ceWqs7tyVvqcghW/bvjykYEglDHmAgEAoFAIBAIBAKBQCAQCAS6QCAIdRfgxa2BQCAQCAQCgUAgEAgEAoFAEOoYA4FAIBAIBAKBQCAQCAQCgUAXCASh7gK8uDUQCAQCgUAgEAgEAoFAIBAIQh1jIBAIBAKBQCAQCAQCgUAgEOgCgSDUXYAXtwYCgUAgEAgEAoFAIBAIBAJBqGMMBAKBQCAQCAQCgUAgEAgEAl0gEIS6C/Di1kAgEAgEAoFAIBAIBAKBQCAIdYyBQCAQCAQCgUAgEAgEAoFAoAsEglB3AV7cGggEAoFAIBAIBAKBQCAQCAShjjEQCAQCgUAgEAgEAoFAIBAIdIFAEOouwItbA4FeI7DKLqukv/zlL73ONvIb5wgsPdnS6ZxzzhnnKETzRysCxxxwQPrtrbeO1upHvUcwAr20i0GoR3BHj/eqrbLKKmnWWWedBIaHH344XXPNNaMGmoUXXjhNP/306dprr61V55s+eVOt6+KiQKATBA6e8uAg1J0ANoKvHY928cZLLklLffzjfe+Vz3zmM8Py3ES5/enqXuMchLo//RalDAKBiy++OE2YMCH99a9/nXj3bbfdlo499thB5PZ/t3z9619Pb3nLW9J+++3XVT7l5t/+9rfJJPef//znDfnttddeab755kubbLJJrbKCUNeCKS7qEIEg1B0CNoIvH492MQh1fwZkrwlm3VqPlXKDUNft8biu7wiYOC655JL0ne98Z5KyEeIPf/jDac0110wbb7xx2nDDDdMaa6yR9txzz/Sxj30sX+veb33rW5nMfve7303TTDNNeuSRR9Jhhx2W80PUjz766HTaaafl6xdddNF01FFHpX/9619pxhlnTFdffXVabLHF0nTTTZcuuuiitM8++6R11103ffWrX01TTjlluu+++3K57l9ooYXS008/ndZaa6106qmnpnnmmSc9++yzadddd02f/OQn08orr5ze/OY3pymmmCKdffbZ6YgjjmiJZRDqvg+zcVFgEOqx083j0S4Goe7P+B0rxLYuWr1ubxDqusjHdX1HwMSBhD7xxBMTyz7hhBPSTTfdlC677LJMmldfffX0gx/8IP3iF79IF154Ydpuu+3Sf//733TcccellVZaKX3729/OBHjffffNxPr8889P73//+7NC/ZWvfCUTaOkDH/hAkrc8fPbZz342E+kHH3wwbb311mmZZZbJJPvcc89NZ511Vs7nwAMPzK4cv/71rzOZXnHFFfO1m2++edptt93SVFNNle699978HUL+vve9Ly8CPvShDwWh7vtoGt8FBqEeO/0/Hu1iEOr+jN9eE8y6tR4r5QahrtvjcV3fETBxvPjii+nOO++cWDZC6/8NNtgg7bTTTunxxx9Pn/jEJ9Kb3vSmtNRSS2WFesEFF8w/yKvrv/a1r6Xf//736T3veU8m6FtuuWUm2Qh1SQj18ccfn5Zbbrk0++yzZyUZ8XXdb37zm5z3u9/97rTeeuuleeedNxPwE088MZ100knpxhtvzNf6//7778+uJNNOO21WvbmCLL/88mm11VZLM888cybp8mqVQqHu+zAbFwUGoR473Twe7WIQ6v6M37FCbOui1ev2BqGui3xc13cEWr3aVJG111477bHHHtm/2t/ILtUY2b7llluyKwh3i5/97GfZNePPf/5zdt+gTHMZaUaouYIgxnPNNVcm1EsvvXR6+9vfngm1v/lK//Of/0y/+93v0kc/+tF0+umnT0KoL7jggvSrX/0qu44ox4bKTTfddKIPtc2J1OxCqLfaaqvkp5om7Duh7zhHgWMfgSDUY6ePx6NdTE89FZsS+zCEe00w61Z5rJQbhLpuj8d1fUfAxMG145hjjplYNncOZNXnV155ZSa2Rx55ZFaE119//eyv/OUvfzl98YtfTJ/61Keym8cf/vCH7E/985//PBNahHnqqafO7hlVhbodoZY3kk1xfte73pV9pynSfijU3Et23nnn7Hft2m9+85uZSN99990tCXUzQEOh7vswGxcFBqEeO908Hu1iKNT9Gb9jhdjWRavX7Q1CXRf5uK7vCHCP4CZRTU899VT2S15kkUXSRz7ykbTLLrtkhfpzn/tc+slPfpJeffXV9Prrr2cXEGr1T3/607xZEQnnPvLxj388+0fbXIhAU5klLhztCDVV2USGuEvPP/98Vrn5TVOulct3GoGfYYYZsh+3MpRXonw0KtRBqPs+pMZtgUGox07Xj0e7GIS6P+O31wSzbq3HSrlBqOv2eFw34hFAovk3c+9AoN/2trelZ555Jteb+8Vjjz02sQ1cOV566aWmoe7aNRQ5fuCBB/J973znO9Ojjz6ay3rrW9+a/vGPfzQtqxPgQqHuBK24ti4CQajrIjX2rhsLdjEIdX/G5VghtnXR6nV7g1DXRT6uCwT6gEAQ6j6APA6LCEI9Djt9DDU5CHV/OrPXBLNurcdKuUGo6/Z4XBcI9AGBrb63VQ7H1++05JJL5nCE/U5Rbn8Q//LSXx6WE9/607ooZawjcKNIS6us0vdmjhWiVxe4aG9dpJpfF4S6O/zi7kCgpwjY4NgurF5PC6tkFoZ0qJCdNN/xhnN/UI1SxjoCN557blpqvfX63szx9rxGe7sbYkGou8NvUHeLoSzEWzlUZFCZdHjTAgsskGyKE4+5WRKlgu8xn+CSxGT++9//nmMr9zKJ5ewgloHaX+psc6HoGkLgDZRsGrRRsM61A+VV93s+isp1OmKr5PRF/tw2K7ZLQajrot7ddTFxdIffUNwddjHsYqtxdeN3v5uW+tKXhmLYtc0z7ER/IB8rOAeh7s94maQUpGmdddZJDz/8cN9KFwljzjnnzDGSmyUh5c4888z04x//eOLX5513XrrqqqvyEd29TE42dNhK2TDYKu9SZ3Ggv/e976UPfvCDA1bD4S3ydmjLGWeckX74wx/m2NBDlYTHE8nDBkfE+gtf+EI+XbEkdf7GN76RI48g3SKFHHzwwS2rE4R6qHpq0nzHigGvi9Zwtbdu/VwXdjHsYhDq/yEwXM9rlNuJxXrjtUGou8NvUHeXicOpfWIsv+Md78jqJjImPrLwbcKwUYaRWaHYqMsOFRE7+S9/+csbyp1//vnT97///RwPWRxkhNLflF1xmcVfdg0ye8QRR+T8haSjSm+00UY5RjNCffnll+fffpxAiFAfe+yxuZ5cEV555ZV0wAEHpEsvvTTHgr7vvvsyUXcEeLOE3Fo8IJviODsK3H2bbLJJ2n///XN0DPcLaQcXcaSRU3VyMIs6n3rqqRMJ9brrrpvD0QlZp2yHtjgcxX3iQysDwb3iiivS5z//+fTaa6+l22+/PT355JOZ2Ir8cckll2SMihp/6KGHpoceeigf0vLvf/+7dp+K7nHdddelNdZYIx+PbjHy9NNPp2233XaSRYmF04477pgPh3Ea44c//OHcxmYpCHVt+Lu6MCaOruAbkpvDLoZdbGkXQ6EekmeuMdOwi93BHIS6O/wGdXeZOPbdd9802WSTZUUVYf3b3/6WySPSe8cdd+TNab6baaaZ8sEkrncy4DbbbNO03Ouvvz6TZwQT+d1uu+1yjGYkkuuG47gp1CeccEImzH/605/yMdnIJRLvpD9E9p577klf+tKXUlGouVwgv04YdFjKCiuskNViJwYi+erpkJXGJDQdgu6QFScZIrROIrzhhhsmlmkx4cRD+SPd4koXwo6wqvMhhxwykVBfffXV+Tjxs846K7vNKFtMaobA/+rkfge+WCSU2NQWK+qNyG+xxRaZ1JakzeU48muvvTYvOPSFUw5tmqsmffejH/0of/T+978/L3aWXXbZ/D9iLmyfBURJvhdiD8lea6210l577ZUXATAOQh1KzKAMyCBuGq6JspOqhl0Mu9jSLgah7uRRGvS1w2Unxkq5QagHPfQGf2OZOLg8OGQEIaReUlwpuCeffHImrIjYNddck0kmooi4+R4pbZYQTT7SFFMrfeopEkl5VUYh1Mh7IYEIKrKOfM8yyyzZx1f+fhdCTSX2oy4SUo1sIv677rprVrGbJQTZdy+//HJWhRFRB7NUCTVSv9tuu2WCL070TjvtlE8fpFjDp5FQI+Z8sBFXh7G4FilHkCnq1QNauHpQ7flrq4fDX7T1kUceyfVuTBR9C4l55pknvx1ArhdeeOFJLnPqov6RqP/Ie3FFsWCwkKkS6uWXXz6f5EgN90Zg8sknTx/72MdauruEQj3456qTO8eKAa/b5uFqb936uS7s4v8R6rCLk46c8KHu5Eka/LXDZSfGSrlBqAc/9mrfyRUBkaOSFjcBaiV1lc8vojr33HPnTXoIc/EXRkipzgYbv1xE9/DDD29JqB19jZgjnfLgMjH77LNndwjfFUJdTgTUAKf8UYcRan6+SB9yy9WkEGqkkP8vpbokKq5TCG0kauaCUq7TPiR1mWWWyW230RHpL6r4L3/5y0yK+TubRODUilC7V32p4hYYVGgnHSLU1GIuHs0INTcQ6vXNN9+cMaFGcwNpTPBG1tV3mmmmSbvvvnt2jakmSj83GYkCz78b5hYgMJFgXU0wdYqi9iHdsJC8SfDTmCLKR+1Ha9AXjhUDXheA4Wpvu/qFXQy7WHhZ/mwAACAASURBVNsuin4UmxLrPu6Dvm647MRYKTcI9aCHXv0bnagn6gTlePHFF88EFvFDCpFbyjKXCcQUKatuwHMfAop882VG4rgtNEuUVa4Q1FBEjTJL5aWIcjUYiFAj0ogkF43VVlstK7GUXUT/05/+dFam5YHwOvabn3I7Qo1MU4j5NCOc2su94rTTThsUoeYqgRxThk3G8lEX/uWtCDUXGu3SJkQaQabaV5MIHPyytYnPNdyOO+647MKhv6pJrOaqj7S3APqsuKLAzEZIxB35R8otarR77733zi4kcGyVQqGu/1x1c+VYMeB1MRiu9rarX9jFsIu17WK4fNR91Lu6brjsxFgpNwh1V8Ov/s0XXnhhPqp6woQJ+QANmwsPO+yw7KqAxHGFcDw2P+k999xzohvBmmuumVVNvtbu5VONqLVKCJ5Nf6JPUL798H2uQ6ipxHyOqa5UWL7biD7iipRSgt/85jfnDY6IIvWcKtwurJ4FgXbZzPj4449nBbjq8tFMobag4PJho2ajy4coGdRy6fnnn8+bEy+66KK02GKLZXJfVajlgxDzI6dqWwDw6YZHNVGvJeRfXp0kPuvePEjahyxbPHAN2WGHHXIdix+1TagWQ638BOURhLoT9Ad/7Vgx4HURGK72DlS/sIthF2vZxSDUAz1KPfl+uOzEWCm3JaGeb775MhGgWPI55X9KaWsWcQIBo4q6h8rKdcDfIifwEUbIkEGKpSgK1MBbb7017bPPPlk9lRAlr/v55Xr1jkghJsjkHHPMkf1SESMJAaLqInmNyYY9CiTy6kHltkBpbBYdwrWrrrpqolAiiiJXUGbVVV2QK6/zbYoTjeKPf/xjVn47iQRRrR9XDH7R1VjPlNYXXnghEz6uAYh1s1jF73vf+7LLgLKt6meYYYZJmo64wW4oEwWcP3S1/srjyoFYNib1ham6i4LBR7oXydh64IEHMpb6ubE+1TJgqmwbM41T48DGzmpS/8H2qXymm266vPi49957WzaPD7vxE3GoJ4VorBjSuuN6vLW3Di5hF8MuDmgXg1DXeZS6vma82adet7cloabuUf8opkg0Aut1dbOIExRHPr5ev4uxi0gjoV6be9XO71fimyqig41ZVELqBN9dD5NrbQpDkr3CF/ZNBAruBlRZSie11rWIbglV1jiCKLFUQgoqn1sEme9xs+gQ1Eu+zNRcJNXGO39zb0BYkS9EHxFSplf2/h+Oo6Gr7eTyYKFTTYjuUMZbbvekWozYENiYEFd+y8OdvAUwLi3ijKuRnEKh7k/v9NqQ1q31eCu3Li69uC7sYmcojiq7GIS6s84d5NXjzT71ur0tCTVXAUSW/y5lmdorWkSziBMINTcFhI6fbFF3uSZwa0CakWekeaGFFkqnnHJKJrk2X1GoqdF+EGZEHvlFwr1Kt3mND668+bYi1PxTEeVmCaGmiAKK8iy6hHKaRYfgioB0y18d5as+yPVmm22WVXGqpjwomNwJEHx5Rxo9CFgYWiAJhTfSUxDq/vRQrw1p3VqPt3Lr4hLX9R+BUWUX4+jxvgyQ8Wafet3epoS6RJewCYwvrVfaCDVC2yziBEJN1fXqnQ/v9ttvn31Iq4S6bA6zeQwxp/5y3RA54pZbbkkbb7zxREJd/Gp9htwizxRn7hfcSfjuUsBbEWouAXxV+drymXVfs+gQCHW5lvsHMk2xLoQayeauUg0Ld/fdd08Mm9aXER6FjCsEglD3p7t7bUjr1nq8lVsXl7guEGiHwI1bbZUOOPDAjkG6bPrp0/OTTdbxfeWG8fa8RnsHPVTyjS0Vai4WSDG12QEZNqKJgtAs4kQdQs3n1Ua8gw46KIeIQ1Kpz0K68b/lGsJVw6EjzQg1X2MHj0hIbyu/WQp1I6FWTrPoEOIutyPU2muBwNfbRj8LARvP+HV3k2yes5igdvcrLbDAAtmVRv81S9xtLJ6quFLvhYlrt+lwMPW3MdHibKD2lzpbdHmda0wOlCyiVlxxxVrXDpRX3e8tQJVrE2WrBHsbS/nIt504hIdaaqm6RffsujCkPYOybUbjDedOUA27GHax1Xi58aabOhlKE6/9zCKLpAemnHJQ97ppvD2v0d5BD5X2hJoqzI3Dhi0RF7hFUHCbRZxoR6htCLQJUbg36jJSIfqBQ0wQJGHMfI6UIE6zzTbbJISaSq5sidsHl4/yf7OmNyPUCEqz6BBIcitCTeHWXj63JUydsm2ybNzU1mkXlAMMHEndr2RDpyO+nZTYLJWjxx2fXVKJQy0mdS+TzZPciYT0a5dKnbndVEMJtruHe5K89ZuoJULYDaVvOZ9749FBOsYwNyX7CUqyGRKm3vBI9hcI39dy4ghC3cuh1jKvmDj6AnNHhYRdDLsYhPp/CIR96sh0DPriXuM8YNg8bhGN0RmqESfqtAShsWHxtttumxidwX0IiBPvKKOIO/LRjmSJGCHOMkJGsXzve9/7huL5Zz/33HNNq9VJdIhqBuomSoP6D7QbuQ4eZeJAspxaKBwddRMZm3rqqXNsagsNyjAy2yyySmM5NlQK+0ZppsgjlP6GkzcA3jC4BnbiJstfBAzYc3EphJqLjI2hfmw09CZB/Gv1tDAR/q4cDW5BYmGEqK+++upNm47c8kXX194SOLjGfRYp4nGLnuJ+iza4eGOBnKoTn3Z15i5UCHWzaC0ir7jPWwxlILhC5AnpJyShQ1yefPLJvBg0xvjEw6io8RZ9iK43EJ1E+yiH9JQNsojz008/PUmcags841R5MOBa5C2MCCXNUrh81HmCur+m14a0bo3GW7l1cXFd2MWwiy3tYijUnTxKg752vNmnXrd3QEI96J6p3FgItUghg018oJFAp+MhtUg915HGhDgiZCM5lYlDBBWKPdKJsNo0hzwivXfccUeOJuK7ZpFVmrWPbznyjGAiv3zAqbxIJNeNcrALQocwC2snkko5tZBLDSIrTrKILEWh9uYA+RWZhVuOsISO27YBVbg/9RSvujF5K4Ggc+exKRShtRm1GofaYkLkF/kjnDa9FsKOsDbGoW4WrcVix4PBjUad3G+cWCSIq23cWKyoNyK/xRZb5PjfJWkz1dgmHceNW3DoC8eri2xTTfrOEeqS/QAWO+UYd8TcArF69LhFzlxzzZXf0CDrFnzl6PIg1KHE9MtO9XriGIp6h138v6PHwy5OOsLC5WMonrg35jlcdmKslNsXQi3aBtW4E/WvsavFQG53xHV/hltvSikTBzWeCwlCaOMktZeCi3AhrFbroq00i6zSrCYWLHykKaYWFfy/kUjKqzIKoUbeCwlEUPnJI998yi1WkF6/C6GmEvtRFwmpRjYR/1133XWSTZvVeiHIFG4h/ajCiCgf4iqhRup32223TPC9Cdlpp51aHj0Ok2bRWkw+CDJFvdnR4/y11UMsdW0VV129GxNF30LCWPN2ALleeOGFJ7nMvoJCiKn/yLt6SRYMFjJVQu14dm5MO+64Y44ow3C4vtWbjlCoe/OMDZTLWDHgA7WzfD9c7a1bP9eFXfw/Qh12MQj1Oeec08nj05Nrh8tOjJVy+0Koe9LTozgTrgiIHJW0uAnYWEnZ5fOLqFLbbdJDqIt7Q4m20iyySjM4dt5550zMkU55cJlwGA93CN8NdPQ4X3ZuIMgtV5NCqJdffvm86Y5SXRIVF1lsd/S4a7UPSV1mmWVy25FKpL+o4s1OSuQexAXEgqOqULu3WbQWhLrV0eMUYm4g1GsxsWFCjeYG0pjgbcOk+orzLbIMdbuaKP3cZCSqM/9umCPIMJFgXRI/bgvJzTffPH9kMcHtAzH3JsFPY4pNiUP/sI8VA14XqeFqb7v6hV0Mu1jbLk6YUHeoT3JdbErsDLbhshNjpdwg1J2Nt0Fd7TQ/GzApxw6T4d5ASeWewF+assxlggKPlFU34LmvWWSVZhWhrPIx5xuMqFFmkVKbK+scPY5II5JcNGxKpcRSdhF9YREp0wguwisCCj/ldoQamUZo+TQjnNrLvUIowsEQahtUm0VrEUGjFaEu4Rq1CZFGkKn21eQNio2z2sTnGm5ipnPh0F/V5Nj4bbfdduJH3gLos+KKAjMbIRF35B95hiUfc1FL9K03AOFD/T8Ix4ohrWsYxlt72+ESdjHsYm27GD7UdU1MV9eNN/vU6/YGoe5q+NW/2cE2JhDHmiNlQgg69IarAhLHFcIGUH7VoqsUNwKH3TSLrNKqZATPpj/RJyjffvg+1yHU1FQ+x1RXKizfbUQfAUa2KcFOReSnTr3ls23jX7uwehYE2mUzo5CDFOCqy0czhRrppFDbqNnoQ90sWotTLBdbbLFM7qsuH/JBiPmR8/W2AODTDY9qol5LyL+8Okl81r15kLTPosPigQItmo3TGRHsOeaYY6IbTTk5tFk54fLRCfqDv7bXhrRuTcZbuQPhEnYx7GJxL2xrF4NQD/Qo9eT78Wafet3eINQ9GYb1MuGKQZmsxnr22vOFF17IhI+7BWLdzL+2GlmFyuvkv2p6/vnns0/zUCYKOH/oxhjgXDkQy8bED1BkFnV/4okn3hAtZrB17SRaC0yVbWOmhYIoIY1hD9W/G/9+Bx9ZfNx7770tm2QxpR6tlOlyYxDqwY6Kzu7rtSGtW/p4K7cOLmEXH6sD04DXjGm7GIR6wP7vxQXjzT71ur1BqHsxCvuch81wXDOqCdEdynjL7ZpItbYhsDEhrvyWhzt5C3DwwQdntVg4wZGcglD3p3d6bUjr1nq8lVsXl15cF3axMxRHlV0MQt1Z5w7y6vFmn3rd3iDUgxx4cdvoQUA4PIq+UHgjPQWh7k8P9dqQ1q31eCu3Li5xXf8RGFV2MY4e78sAGW/2qdftDULdl2EahQQC9RAIQl0Pp26v6rUhrVuf8VZuXVziukCgHQI3DpJQd4uqcwjseep3inLrI/78ZJOly6afvv4NlSt7bY+DUA+qG8bGTcLEic0sQkXxK+aHJ+qFg2W8QrWpsJNkY6BNfiKDjOYkTOCKK67Ycfu7bXMQ6m4RrHd/rw1pvVLHX1STuriMpOvCLrbujWGzi8NAakfSmIy6tEbggSmnTMIjDib1eh4IQj2YXhgj99igJ1oHP+cSD9khK0LQHXTQQZOE72vXZFEzbOxz6qIIHkLsNW5cHG2QCfnnlMp++1wHoe7PSOm1Ia1b6/FWbl1cRtJ1YRdb98aw2cUg1CPpERlRdQlCPaK6Y/xWRvxrod4cJFJO7xuIUAvHVzYgCjHnR8xm4QAdBuPExYceeijNOeec6R//+EcOZSdyCcK+2Wab5euo3gi7Y7snn3zyfIqjDTIl0gaV3Hc2XjrVkIrugBqfiTktIoqwg1NNNVU66qijshpOVae0C58n6oZ67bPPPkkoQGWaCOQv3rTNib4Tn1sSis/BO9X6ONxFvHBhCxFrpx9SrsTBdvhOCWeo/uKHO9JcnGsxrkX0mGKKKfLBOGJZN7u/1agLQt2f53G8Edvham9/erO3pYRdHIF2MQh1bwf5GMotCPUY6szR2hTEVEg7h5kgn4ipiaQdoRaH2hHiRx55ZCa7W2yxRT6sBvGkUFNzHWby8MMPZyIprqiDZhx9Lh62MpBgZFWYPeq4sIGIrANYShI/2oYZMbl9p158yoSmQ3x9zkXFPSeccEISyxapdoy7tjz44IOZcDudsUQ+UeYee+yRCTxF3T3IvxCFFgSOHXc0eqnPXXfdlQ/dcSiN2NUOhhHeS0xwB7Nopx+h+Cjz2rbQQgvlONTa5VRHB7q0uj8I9f8QGC6iF+WOVss1tPUOuzhC7WIQ6qEd+KM49yDUo7jzxkrVEWdKs0NHHHwi7B6S0Y5QU5kps0ijRGlGYOeff/5MgBFqLh8OkUHWEV4KuM922WWXiYemUIYR2mWXXTb98Y9/zAfZVBPyTdV1QiRlmdqLlMvDaYvuoxJvv/326fjjj8/klYJNPUZ21QXRpbwj1MpCmhHeU045JV/jO/VwcIwf7XJNqU85IMax606LhI/FBNKtPo5dd+S4RLWnqjsMR8xtOFLMXe+axvsp9kGog1D305YM1wKin23sRVlhF0eoXQxC3YvhPSbzCEI9Jrt1dDUK4XTIiBMMuUTMMsssadVVV83KcSsfakr03HPPnRwBLv385z/PxBnprRLq4kPtGO6nn3564gmJTnEsyQmJjuVGNrmKlMStwgmMykBQuW8gsI4DRwqozwgx9RuhpiL7f6655sqEGoF9+9vfPgmhLsePaxdizkfSxsk777wz3XLLLWnjjTeeSKhLfaonLmofhZviTYmnUh9wwAG5XIn7iEUFMo5Yc/+weYeyjbg33m8BQBm3QCm+69XR455+p+EiXFFuf3p6uHDuT+t6V0rYxRFqFydM6F0nR05jCoEg1GOqO0dfY/gZO2YbsSt+y9wXzjnnnPTWt761JaFGVrk2UHPFdT799NPzUeI77rhjmnrqqbObRXVTYiHUxxxzTHbbWH311TOJR2YRYj7HjYQampRvR3dz70DaHXXONYVifeCBB6Zjjz02k2b51iHUDzzwQK4bv20LAmWuv/76aeWVV84LCO4cVHf5NRJqp1LCiusGEkwhR8Ap1RR5p0Fqj/ZZlDQSauU03i/Pe+65p+nACR/q/jxPw0Uwx1u5/enN3pQSdnEE28VQqHszyMdgLkGox2CnjqYmIaT8kfkcl8SFYuGFF05XXHFF3tjnZEG+zDYrVtOZZ56Z5p133qxq33fffenzn/98Jpg2+yG3flcVau4NXqPyhV588cXzxr7HHnssrbXWWpnAX3nllZnIVhPfY24g1OAXX3wxk1nuGZThySabLJfNJYQbRh1CjXy//vrr+d4ddtgh1xtpf/XVV/Pn6kSthkmpT1Whdu2ss86a1XwuLBYR6qIN6sJ/W5sp7s0U6mb3txovQaj78ySNN2I7XO3tT2/2ppSwiyPYLgah7s0gH4O5BKEeg506npokigVyWmJXazvS+tJLL2UFulWaeeaZc5SOVups431ILPJdTXyUqcJFWR8Idz7UJ554YrrtttsSpbrUD4m2MOBWgri/7W1vS88880zL7JT7xBNPTFIfaj7Fm6sH9bpdanZ/s+uDUA/Uo735frgI5ngrtze9NTpyCbv4v34aErsYhHp0PATDUMsg1MMAehQ5PhEohJpLxmhIQaj700vjjdgOV3v705tRSqcIjDq7GIS60y4eN9cHoR43XR0NHW4ERNt47rnnaivaw13fINT96YHhIpjjrdz+9GaU0ikCo84uxtHjnXbxoK4fjUeex9Hjg+rquCkQGPsI2PR57bXXjv2GRgv7joA9C5ECgdGIQNjF0dhro6POvbSLcfT46OjzqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqFYgEAgEAoFAIBAIBAKBwOhAIAj16OinqGUgEAgEAoFAIBAIBAKBwAhFIAj1CO2YqNb4RGCVXVZJf/nLX8Zn40dYq+d50zxphgkzjLBaDb4655xzzuBvjjsDgWFE4Kgjj0zXXnfdMNYgih6rCPTSLgahHqujpEm73vGOd6SPf/zjb/jmF7/4RXruuedGDRIbbLBBOv/889O//vWvEVvnJZZYIr322mvptttu66iON33ypo6uj4uHDoG5b547veNv7+hpAZ/5zGdSLw143coNV7l16zec14Vd7B/6g7WLN95wQ1pq6aX7V9H/X9JwPTdRbn+6utc4B6HuT7+NiFJWWWWVdMghh6Rbb711kvrsvffe6dFHH+2qjr/97W+T/P/zn/90lY+bv/71r6e3vOUtab/99mua1w033JA+/elPd13nVhX98Ic/nL7yla+kz33uc4Nuy1FHHZX+/e9/p1133bWjPIJQdwTXkF4chHpI4R0xmYddrNcVw2kXg1DX66Nur+o1waxbn7FSbhDquj0+Bq4zcRxwwAHpgx/84CStedvb3pZ+/vOfp5NOOin95Cc/SRdddFFWgK+44op0zDHHJArOs88+m77whS+kv/3tb5noUrpfeeWV9P3vfz995CMfSQsttFB6+umn02qrrTYx72222SatuuqqacYZZ0xvfvOb06WXXpq/nzBhQvrWt76Vy9lnn33Sxz72sXzPxRdfnC655JJ03HHH5WuOPvrorJzvtttu6U1velO65ppr0te+9rWEUD/00ENpzjnnTP/4xz8Sxfqpp56aWK7rl1pqqTTbbLPlck844YT0wx/+MK255prpG9/4Rpp88smzW8Umm2yS6/bd7343zTrrrLmNm266aW47Qn/dddfl9vt+mmmmSY888kgm2bPPPnv+bPrpp0///Oc/09Zbb53zW3vttdMuu+yS/vvf/2Zsbrrpprw4kIf6+Az+cGiVglCPnActCPXI6YuhrEnYxZFvF4NQD+UT8H95jxViWxetXrc3CHVd5MfAdUWJqfroPvHEE+mrX/1q2nPPPdMnPvGJdNlll6WVVlopE+Hvfe97abLJJsskFilEpvfff/9MerfffvtMorfYYot8369//eu01lpr5WtK2muvvfJn3/zmN9NGG22U5p9//vw3dXmGGWZI2223XbrwwgvzbyQUkUbOqegILZX4V7/6VTr77LMzuUWMP//5z2fS//DDD6dDDz00HX744emss87K95b07W9/Oy2//PLpS1/6UiayiLP/qeh+fvOb3+RFARXZK8h555037bDDDrmNyDq1fsstt0yf/OQncxlTTjll2nfffTOJPvfcc9OSSy6ZZpppprwo8Plf//rXZPGgjldffXW69tprM55XXnllxgpmiPWnPvWptMIKK+QFjfY2S0GoR86DFoR65PTFUNYk7OLIt4tBqIfyCQhC3St0g1D3CslRkE+ZOH75y19OrC1l99hjj80KMEV66qmnzm4KSCfl+rOf/WxaZJFF0tJLL53uu+++TBYRTeRQoi4j4Yjohz70oezmUCXUyy23XFazkWb3uR653myzzfLfCK/fCy64YP6hIisfiUVEL7jggly2hPxSifl8I+uU3lNPPTU9/vjjWcWuEupZZpklbbjhhvmj3//+91k5PuKIIybmhdjPN998uc1f/vKX0x/+8IdMlrVbHXbaaafswlJUcXm85z3vSVNMMUVW8uX3u9/9Li277LIJgadsn3nmmWmZZZbJZJmf7AMPPJDe+ta35oWEfCS4ufauu+5KW221Vf6ppgn7ThgFI2l8VDEI9fjo57CLI98upv/+N3yo+/A49lqxrVvlsVJuEOq6PT4Grmv1arM07fLLL0/TTjttJr/IIjcJJBIZnHvuufMmQOqtz4p/8TzzzJOJYytCjbRSiCm4iDPFuhBqLiTcK+688850yy23pI033ngSQk2ZPuiggya6qLzzne/MLhZIcPGhPvnkk7OrSdVXGcFFZCnU0vXXX59OP/30rG4j/RJXE0SXu4j6INHa4joKPkJNLXcvYv7nP/85TTfddJlQI/QMwIMPPpjzo5JTpKvuNIg+9X+uuebKmGpjSYh9K5/1UKhHzoMWhHrk9MVQ1iTs4si3i6FQD+UT8H95jxViWxetXrc3CHVd5MfAdWXiWHHFFSdpDVWZ4soH+Pbbb08LLLBAdvmgRn/nO9/Jyi33BUST2wPCys2Dy8dhhx2W3SkQaq4iL7zwwsS8qcjtCPV5552X1l9//bTyyitnlfiLX/xiVnC5mFDKfUYZ3nbbbbOii/BvvvnmmegPRKi5ZchXnRBdrhZcMriR/OlPf8pEXrs+8IEPpJtvvjm7kyDvFgunnXZaJug24fzsZz/L6jX3Dn7mXFsQbfU68MADs7r/9re/PSvuPuM6gjxz9bAQoeqrq3ZR4E888cR8/0svvdR0RAWhHjkPWhDqkdMXQ1mTsIsj3y4GoR7KJyAIda/QDULdKyRHQT4mDj7BjQkpRmIpp0gjFw5uIZRVpFL4N64hNu5Rc23sQ6alH/3oR9l/mW/yq6++mol4Se0INdVaXgir+15//fXsdoKMIvL8upF5GwvXXXfdvEnRd0h3NcoHhVrdGl0+uJpwvbApkc+1tqk3hVxe1HZEl8IsysnLL7+cr+UTfffdd2fCjSBTo6nPfLpffPHF7L5CaZcX/3J5wU77uZFwa9EWGxAReCSdK4iNnfJHyHffffeWoyUI9ch5kIJQj5y+GMqahF0c+XYxCPVQPgFBqHuFbhDqXiE5RvN517velVVnrhYzzzxzJq+IqugYInAUn2mEk5uFqBudJCTapkAuFfLgt/3MM89k1ZeKKwyffEXmUIc6iYLONYMyLY9qneTFfeWPf/zjxI2BFg7cP0TlKJsF1QUBLkqyxcRjjz02SfHve9/7stpd9RuHi3sbXTq4kyDtA4UnDEJdp4f7c00Q6v7gPBpLCbv4vzds/bKLQaj785T02gWibq3HSrlBKjC0YwAAIABJREFUqOv2eFw3ahAohJrf9mhLQahHTo8FoR45fRE16R6B0WwXg1B33/91chgrxLZOW13T6/YGoa6LfFw3ahCgbkuj6fTHAu5W39sq+2b3O/E5p9D3O43kcqd5apo0xYtT9BSSXhvwupUbrnLr1i+uG3oERrNdvPEPf0hLfeADQw9SQwnD9dxEuf3p6l7jHIS6P/0WpQQCtRCoQ6gne3WyNP2j09fKr+5FvTYsUW5zBMYbznXHQVwXCLRDIBTq/oyP8Wafet3eINT9Gae5FKHm+AxXkwgS9957bx9r0V1RNvAIF3f//fe3zKjaTv7QDmdpdZBJd7VpfbcQe3ycRduok1wvPJ6DaWyUrEYrqXN/r66p4/Ix1fNTpYWvXLhXReZ8em1Y6lYuyq2LVHfXDRfOdWoddrEOSr25ZrTaxRuvvz4ttcwyvQGhg1yG67mJcjvopC4u7TXOQai76IxOb73xxhvTPffcM0nINAeAIJzdpDPOOCOHkus2H3UQ1UNouRJnurFeTjYUAUT4t1ZJO236e/7553OkEBvyxJjuZOEgagbyblPiYJIwdXyoq0eht8pHxA6h7G699dbkQBih8xxo4zTGuslJiDYjOoGxmxSEuhv06t/ba0Nat+TxVm4dXMIu1hdUxqtdDEJd50nq/prxZp963d4g1N2Pwdo5mDgcLtJILIVVEz1DqDhxjUWXEHtZSDpkVIg3h4cIaSfsmxBxlG5xjv/+97/nPIW2W2eddSYe/Y1MCmEn6oT8nCqIXArzJqbyRRddlMPRKdOphOIlU2iFekMM/XYUt7jT00wzTT6hEMm2AJCfMHDCw4nXXE4BLEBop7jM5TATYfgcFHPKKafk/KaffvocsWPrrbfOsa2Fvlt00UVz+Lw99tgjH2cuLJ8DW9QZSRWuTii673//+3nxIESdWNiStgh3ByvHpYsU8uSTT+Z2uN9phOqj7dRnoeyqqZz0iFhLYmOLDGKB0uxesbWruAg7KMSf/I8++uj07LPP5rB6IpNonxCBDnhxjTZS98WrbpaCUNd+nLq6sNeGtG5lxlu5dXAJuxh2cSC7GIS6zpPU/TXjzT71ur1BqLsfg7VzMHH89a9/zcSwJIQNmUTEkEVxlhGwD37wg1nVdZAJIkutFf+YQuEEQYTXYSROG0R8xVp2f3GtEAMa+UY8xXsWFs7fYi/PMMMMmUxfffXV+XCTs846K8ddthkO2d1yyy1zPGUxq5FtZBWBdC0Si0w7CGbHHXfMCnI54rtKqBFXZB8xLqq3ts0000yZ0MsTFlwyxGVWLyRTCD1EW7xm9RdqzjXbb799zgvZdqqhz4488shM9n0GG/GwhcVDXLWVkdYOhP+oo45KU001VT490YEx+qIkZNsCRoi8O+64I1H8y8Ki2b3wqeICu/e///2ZwFP3EXH95BhziwFlCwv4gx/8IOcP51ZqfRDq2o9TVxf22pDWrcx4K7cOLmEXwy4OZBeDUNd5krq/ZrzZp163Nwh192Owdg4mDgStGhcZaUXkKKsiHiCK5SAR6ipijazZoU05veCCC9LSSy+dy1xiiSWycuy4bGQa0S4JoXa4CWXXUeKIJdJZjv3m4vDud787rbfeepnEOjGQGwcS7NhtBF5dHVrigBNuEGI7H3zwwZkcItQIsgGpjGrSTgo0Qsvtg6LsgBT5uY9aveyyy+YTF9UHoaeQcyXRBnGd5eHQlU033TTXnQuHpA3iSCP25TPknUrtWq4XV111VS5HG7XJ374v93NboSpX01JLLZUXGcsss0zGGuFHkJvdi+g34lJIttjXP/3pTyf2EWJP0VYeJV5/tnNjCUJd+3Hq6sJeG9K6lRlv5dbBJexi2MWB7GIQ6jpPUvfXjDf71Ov2BqHufgzWzqHVq00ZUKgRyKIUU64pr04FtEHO8d6INncFxkeywQRxReCaEep2x34Xtdv9CO5HP/rRTMwLoUZGr7/++qw+I9DTTTddJpgnnXTSRB9qBHaDDTZ4g59yo8uHunJRkZ8BbFOjtnJj8VvbqPI2EfqOIl8I9T777JPJfPHp5tZBledCUZRxrjBUYXm4jpsF/BBkn1s0UJxLQtypxxL3FUeNWxyUQ1yOP/74fGCBExsb70XMLSoacUHkqdYWB/pIuyT193bAPRZNpe98p45+qmnCvhMGHE+xKXFAiAa8oNeGdMAC//8F463cOriEXQy7OJBdTK+9FpsS6zxMXV4z3uxTr9sbhLrLAdjJ7SYOJLT6up9aSWnmvoEYr7322pl0cgtAdrknILoIJNcJajH3hLvuuitdfvnl2SWEi8Opp56aj7guqd2x31wcEDlqMCXaqV9IJTXXSX78opVF+RWFhIsG0soNg1922ZTYCaFWL/mpv7bxFacEczthTJFkdfEd9RpWK620Unrve9+blWyKNJcPSi8F3WdcP7ivwAfx5fIhlrL6/vjHP86LAIsSriqrr756VoYtWBD5avQPf9tECTOLF1iqGzeTZvdy62jERf9MPfXUub+00TVOUVSePHzWSKibjZ1QqDt5ogZ/ba8Nad2ajLdy6+ASdjHsYpVQNxszoVDXeZK6v2a82adetzcIdfdjsHYOSJiNa9WEFHO3QL6QsPPOOy+ruQgu1ZO/tQNKHPuNMPJBprzKx6Y/yiqitvjii2cyLuyb1I5QU3Ip1BdffHGOwiFxzaCwcsFAAhFAbhrF/eTFF1/Mrh02JVYJtQ2RjS4f2onol02Jpb0IsM16k002Wa4/t4m77747k3UbHCXklt8xH2QuIwg8gotMSzZqwsHigauKfLiLqAcXEP7Y8IMbxVnd+H/Dx+eOD+eyUQ3j5z514WONdCuXHzeS3exean4jLqKC2OBpccOVRhnqZkMp//DZZ589CHWTJ6XXBq3uwxjl1kVq6K8Luxh2MQj1pM9Z2KehtztK6DXOQaj702+DKgXZFbECSaa28i1GEm28E0GCgl0Swv3EE090XA63kAceeCATSS4kFGquHVwhigsE9wdEtFeJa4cFhPZIiC6/8Ntvv31imeqgncXfXLQSC4tyj/vUFzm3ubEk91GLualUE3xsuBS2sFXimqG8Rhxb3duIC1UbZrBUd77eSHknMbhDoe7VKGufT68Nad1aj7dy6+LSyXVhF8efXQyFupMnZPDXjjf71Ov2BqEe/NiLOwOBniMQhLrnkDbNsNeGtG6tx1u5dXGJ6wKBdggEoe7P+Bhv9qnX7Q1C3Z9xGqUEArUQiKPHa8HU9UW9NqR1KzTeyq2LS1wXCLQl1DfckJb6/9Gt+onUeHteo73dja4g1N3hF3cHAj1FoB2hnuq5qdLUz07d0/JKZmFIhwTWN2Q63nDuD6pRylhH4MZrr01LLbdc35s53p7XaG93QywIdQv8bNyzAa4XaZFFFkmLLbbYJFnZrGbz32hJfLhtELRpslUSpYNfscSPWIg5UTP6ner2nbqqc0n8uh0/zueZn+aKK66YI5N0kkQbsZlT/w4mtXP5mPn+mdMcd80xmGwHvCcM6YAQ9eSC8YZzO9DCLvZkSNXOZDTbxRuvvjottcIKtdvaqwvH2/Ma7e1u5IxJQu2kO4eJiEE82CSUk8M+epHEJV5hhRXyBrWSRNUQ/q2bVE4gLDGau8nLvSJrIJjNDh4RiUQEixJfuVlZooZINjnaHGjCFCVDX9RNIpu4V6SPwaa6fScqiqgjogyIcCJqiLY7odIhNiJ/iLzSLLUaY05DdN8zzzzzhtvqtC0I9WB7vbP7YuLoDK+huDrsYthF46qOXQxCPRRP4BvzDLvYHc4jklAjdiI3iEAhuoRQbhQ/4dN8JqyZgzKoh8jLOuuskyNFiKssVJkQaq+99lr+XPgzodOEUXP4CXLn9DpxjSUqqlPxECr5i8zgcBEHiCDUok8ccsghOWoDQix+czXSRB34TRzybSS+DAlSLJwcEuewkDXWWCOHZBPWTkJSxVUu9VOvZ599NseKdgAJ8unQEiHbSnJoiWgdsHLIiZB0/n7ooYeS8G7ycFy5I8TlJQY0PIWme/rpp/NBLQhtFTfh7cSO9r2IG7AQh7qa1PWKK67IR5ZLjgYXOQPWzTB3eAxCK5KJ/IXD86MPHXQjqsduu+2W+7ac2lhC74lyom1UF98Ppu8QaichVhcJjnA31sS71u/KazfGdt5557THHnvkMXn//ffnY+TVSUxqiwlHozvNUn4WJKVtwiI2S0Go6zxR3V8TE0f3GHabQ9jFsIt17WIQ6m6ftnr3h12sh1Orq0YkoaYw+kHMkGAkh+uAQzMQLK/VHcctvq84zmIxi/2LnIkRLK6xe7hUIMwO6EAWHfKBIIl1jOwhOz5DXp345weRRIgWXHDBTKiRRORRPnvvvXf+3+EjnSQTB/eBhx9+eOJtCCISeNlll+U8HTxC3eQu4Ghsx4VzPUAwHXAirrHFgfjOMBAz2UErjrx2NHc1hJ7TFS0KHNSCKCPSyKG8YLXjjjvmcHOIImItLB/SLj/lvPzyyxmjKm7+dvgMHJwg6Iht/aCckrTjqaeeykr3bLPNltvkM0S8GeZiSV955ZU5rjXyqV76wyKBMuwNg0WSBYOyLZRgJn+LBn111FFH5Rjdg+m7ZoTaQsV4EM8aAbaoaDfGnP7oSPE77rgjjwvKtSPi3W8RZLGmbcaiMVva1solJAh1J0/W4K+NiWPw2PXqzrCLYRfr2sUg1L166trnE3axO5xHLKEuZI0CSCVGQCiACIqENOp8p+QVAsjnGaFDxBBxfrDIpoM7JESJUkwxLZ9RuZFEhJfiSR3mqkDJ5v6AuCGDVGluD+qh3E5SOYqaelvSzTffnBV2RJDrx+OPP57rRW1F5NUVqfdjEYAYaz8SqV2UUHGV3Vv1Ay7YaAccHCuOLGsjIkoRUA9KtXbC1sEoTk8sx31TnhtxE58ZIV9uueUyFhYE6lJ1q0GeLVIsfii2/tcP8taGRsyVrX0wRkQtXJyAiIjuv//+6YILLsj1k5wmyR1DfuUzqjflXczswfRdM0Jt4QB7R4Yj1I5Fv+qqq1qOMX7XCLWDCdTPuESojUsLFgfkODqdC4nFj7a1ciPRziDUnTxZg782Jo7BY9erO8Muhl2saxeDUPfqqQtCXUWg1/PAiCXUCK7T+Sh9yApFFSlE6iR+r0ih9KUvfSkts8wyWf1D+BBgxIsiiMwUVwuuDtwbHPzBvUKiZl966aWZzLoPiXJgCLLLDcOreYSqJK4JCFQnqdWrTXkgdVwGuLj4m9Ju1e6UwVtuuSXXARmlXiOvFFnttKHOASmtCDX1ntsHdRdJhFch1BYrcEGKLTAsEqqEGplsxM0R38ou7hHyQnobCXXV5aNg5DrlNGJ+yimn5GPUub3MMsss+c0AhRrppEzDrZygpU8cQ+4tQamDenIpsbAYTN81I9SOLLcoszgrPuOwaDXGEOrqkeIItXGp/trrgBkbOvUZF58qoeY+5KeaJuw76Uma1e9iU2InT11MHEM5cfSiJ8Iuhl2saxfTyy/HpsRePHQD5NFrglm3ymOl3BFLqCmVSAn3B8rsk08+mQky9Y9bAuWabzXi7LU89whqrM+4MXANsDlOPsgylw/qLALqMyQVSaQkIpiItvyRIe4W3AsoxQgp4sY146STTspKMteSTpKJAylDjquJsipfbg9cVbihUJ+1ceWVV84uLlw0KLvUTZFCfFeUWaouFwWEtJqQunaEWv2RxXPPPTeXTd2lSlOouZcgro247bDDDoMm1BRlZLwRc28BtNHphdRsLieO6OZ/7HPqLsJ91113ZbcLfaRfKbyIrIWHNnAvGUzfVQk1wy5/eGsrFR5GA40xC59GQq2P+IBbxHg7YNGmbRZy2sZ1p1UKhbqTJ2vw144VA14XgeFqb7v6hV0Mu1jXLoZCXfdJ7+664bITY6XcEUuoERqqNNcBmwttCuQTjHD6nJJ4xBFHZCWZWmvTIbKLHCM4iy++eFZ8+foi0xLVkcLNd1dEBxvEuDsgzwgvcm6DnLK9iuLigbxRTiWkHeGuHnVdZ/jyrS2bDKvXWwBwu7A5z+Y49aWm8//WbpsJuYAgbfLgi2sRoB42IdqwiVQinhYBJbUj1PzSqf9IuI2b1Fj4FeKoXOHxENcqbvKsRvloplB7g4Cgl02J1bY2wxzZtBhyXLd2Wjgg3dqmLER53XXXzf0EA2TXYsCCymeUdQsHCvBg+q68HSj1VA/1hHM1qkm7MeYNCgW+KOnF5WOBBRbI7kOIujct2mnslLZZyDVLQajrPFHdXzNWDHhdJIarve3qF3YxNZ2Lwi6+cdQEoa77pHd33XDZibFS7ogl1F7rU/S8Mq8mr/ifeOKJxKe3JK4PPkMwS7JRzWeSqBQiRlSjc3AhQMwbyTESSTFFWksqIeBuu+22ST7vbui2vhu5RPi1XdlIfgnDhvxW2+l7Sjcy2El617velX2dbWaEFWItHwuXgm0z3Dopo/HaZpjPMcccuUyYl/T2t789t4dPsu9E9KhuuvQZFxI+19V+Gsq+G2iMtcKlsb+qbQtCnfJ+hHPOOaebYTWoe6PcQcE2rDeFXRy/djEIdX8evbCL3eE8Ygn1sssu2zQecnfNjbsDgZGNQCjU/emfmDj6g3OUEgj0AoEg1L1AceA8wi4OjFG7K0YkoaZk2lAXKRAYbwjE0eP96fGYOPqDc5QSCPQCgTh6vBcoDpxH2MWBMRp1hLq7JsXdgcDoRaAdoR7KVi255JLppptuykVM+/i0afJ/TT6UxU3MOwx4X2AeNtea/rQuShnrCNy41VZp90MOSb+efvq+NjXsU3/gHis4j0iFuj9dOLJKcWqhjWuNyUEv/L9HSxJX20bJVgeXNLaTn7joIv1O4lrblMkvvm5ygiRfdtE7hiq1c/kYqjIb853/uvnTNE9P05fixoohrQvWeGtvXVxaXRd2sVsEO7t/pNrFG2+6KT005ZRp3UUW6axBXV493p7XaG93AyYIdXf49exuh7M4zfDWW2+dJE9xl7t1fxGRQ/42+XWbxFK2EdKhLs1SNcJIs+9LOwsptfFUfqKgVDcYDlTPbtskjJ9NqsIO1kliXQvxZLNkY5jCOvfXvSYIdV2kursuJo7u8OvX3WEXwy4aa0Go+/PEhV3sDucg1N3h17O7TRxC/JXwayVjET7EMRYDWzg9oekowA5QEW+bgvPss8/mGMqOEEd0Kd3CCAolJySf6BdPP/10orCWtM022+TweCJ5iO7hcBvfC0dXTlYUf7uE+xMn2uEpwg66Rpxlyrlj0O2+d0iMkwERakedzznnnDlaCMVaBJGStFP8WQfxSA46ca/44CJgCIsnqofY2MIVIr0O0nEypJB+DsFx+EtpkxB6Qvw5tAcOCLLDd3zmFEX3OGVS6LsSdxpxhw8XBwsEOIo57jN9AIvGJF/qTTnkxUmXDmURL1zEFAT9kUceyYsiUUhEIPG9soRrVD+q/eGHH577s1UKQt2zR6ptRjFx9AfnbksJuxh2MQh1t09R/fvDLtbHqtmVQai7w69ndxclBpEsSdg/cYvFM3Y4jUNg/l97ZwJt5fT+8V0hGQtJIplJRYZMZZZCNMmcMVOhiGUKITKGJioylETJmMwZQqZkTAi1TKlMTWT4r8/Tf9/f6Tjnnn3uee97zrnnu9dq3e65797P3t+9332++/s+7/OQeAUiTKxtwv5BYiGEkGniIUN6yYwI4YR4Ug9lFeLJNb706dPHPiNJDDGoCUfI/4nrTKxr4lqTVIefkEKINOQcwoiiTHxusiSSfIfkN0OHDrV43pD+2bNnWyxqyOPYsWPLslti24+TmOL0n5jYkE2iuhDDGuWZVOQcDCCpkFxiPdMvEq4QTpDEKH5M1Of3k046ych9rVq1DAMSw/A7cclJ+oJd+vnqq6+6KVOmGKbY41pwg1gTB7tVq1Z2qElWy+kTyW5oj7jSHEhonw2IAw7jJ4Y1RJpDD08WOIQQ4pCEPvSPGOP0I1VMcj8vItSR3VIi1AkI5OuLMtfZ1L6ofVGEOte7KLx+vvaJqmJXhDp8rVXqlf6L46mnniqzg7I7aNAgU4BRpMmyh1IKuUO5JlEIiWHIREiCGohiu3btjBhSIG6QcFRj4nonxuGGUJOmHTUb0kw9rodck4ac/6Pa8nObbbaxfyRdwT6qMiT00UcfNdsU1FsUWny+aRuVF5WYZDsQT18YJ2Tbq9aQfDJEoi5PmDChrD2fDRJiCilnfGADgWcc+F0zJog5mTQh4CT9QclGGeazvn37mlnGT2ZNCDHKOGSZ2Mdk0kRN5jCBSk4BO7JGkp3RFzJUovZjC3LOC3zYJhkP7h8cWlDXmRcIOv0jMQyKNOnHUf4h6dTlupYtW1rT2aYer9QFmNC4fKgrD+mq8sVReQit2LL2Re2LtiKqVZMPdQw3XantT1GPV4Q6hkUaYiLdo01fl9TbEEbILxkWSduN+wFEkCQnkDd8rfkMJZSC8gtpTEeocYnArQK1FeKMYu0JNS4kKK9kKJw2bZqprImEGmKL64Z3USHUIclXIP4+7TmuGriaJPopJ7t8+PGhNNMeRJWCuwlEF5cRCCj9JLnK119/bePzhBpSj1KOmo1yTiIV1GJUYlR1Cv7akGnUft9fyD5PAPDhBlfG6QsZOBP91mkHUg3pp9SrV8/cYho3bmwkGWWeJwL333+/e/nll8vawfUEFZ4nBcwBiXQYiyfUqdaFFOqQuyX3a6LeSEN7VGp2Q3FJd532Re2LrA35UOd6J4XVL7X9KerxilCHrbNKv8p/cUAsEwtqp09L/sEHHzhSWuPygRpNeu5x48aZ6wKuIqTMhjiimELwbrzxRiNvkDlcRSB0vqAil0eox48f77p06WI+wrhPkPYb9RYXE5RyPiPleY8ePUzNhfDj1gDRrwihRoWnPVxJZsyYYWSesZF+HRIMwQcj0hXjHgKhZkwoz5Bd+upVbdwuILp8xiGBa8CB9nEbgTyjJHMYQfmmv4wNFR4VGtcWn3mSfuEqgm83CjQFf3YwIEKIJ9R8znUcBHgqwDWo8xB2DhrgRbpxfheh/t8Kj3pDC71RZTcUqfxep31R+6IIdXz3oPbF3LAWoc4Nv8hqe1eI5AYhxZBYVFNeZoOs4fqAqoq7AaHfcJ9AmUVdhvhBpim8DIe6il8yL+dBxEMINWowbfEiH/UgtRBLiChEHqUXMl+/fn3XqVMne0mRv0G6E6N8oFDTt2SXD0+Kk8dK3yHAtIfiDtHFLuOmDxSIMO4dfkyQeA4S+H3jykHffvrpJyPGYIRCPGbMGMMPwo1rC23hmw0BRhXH95mXO3k5E99sfLZ9YXzgn/iy6MEHH2zuJKjjuNygolM4DIAdhb7gkoPrB/7Z2OMlTtK8Myco5KmKFOrIbqlyG9IXRzw452pF+6KzPb3U90Up1LneSWH1tS+G4ZTuKhHq3PDLa22iS6A6o4BC1CCvEDkid0DevM80rhD4ChN1I5sCmeUlQGJF0wZ+28Rhxg8YBZcwfLRLVA76EEWhPVxYUJn9i4H0A19tFHqvHCePiQMFLwAmFlxGcOtIHDfYUDc5FCHuMUuWLMk5RCFtQ7JRr33/IfZgh7947dq1bV78OJIxE6GOYhVlbkNfHJkxKtYrtC/+b6+vKvuiCHU8d6P2xdxwFqHODT/VFgKRIiBCHSmcaRvTF0c8OMuKEIgCARHqKFDM3Ib2xcwYlXeFCHVu+Km2EIgUAaUejxROEer/RyBfX5TxzKasVHUElHo8nhnO1z5RVeyKUMezTmVFCAQhwMuWhCuMu1SVDS0UN403FCldJwTyjwCEut+118beEUKkkgAs7iK7lYv4E+uu6/6qVs3ySBBCN6oiQh0VkkXYDr7JhKAj2cmcOXNsBET+wM/4ww8/tKgavJiYTeEFPSJ08FJhMRf8nom4ku34cx2zCHWuCIbVj3ojDbPqIt/AC91uaP8K6Trti+lnI2/7Yh5IbSGtSfUlWgRaNW/ullSvHvl+LEId7TwVVWtEsCCihU+jTeeJyEEoOKJfkI0xORV6qgESxYKX8Yi+kRjlo6jASOos8byJ7kHkjjiLCHU8aItQx4NzMVrRvph+1vK2L4pQF+OtVLB9FqEu2Kkp3o4R55lYybgY+HTbmQg1oeeIc00heYpPoEKou9tvv90Sz3zzzTduk002segaKNZEHyErIBkYuQ7VF8JOuDsihBAVgxCAPioJKjl/W3PNNS37Iir6RhttZJ/VqVPHIoqQbpw04yR0QQ1HVUdpJyY1kTToFzGhR48ebTb5IqB9wuq9//779jefApwU4SRqSezPGWecYdkgSWYDse7YsaOF8COhDbG++ZxwVvSfGOCEy+vWrZulLifJTc2aNR2xvMkKmap+ulUjQh3P/SRCHQ/OxWhF+2Jrm7aC2hdFqIvxVirYPotQF+zUFGfHIKakByeJCeQTYsoXSXmEmljNxIEmVThklwyAxGSGeKJQo+YSH3r27NlGJG+66SZLG06SE5KoYAMSDFkl0QnqOCGuILKJcZlRxklvTqxn/ka/8CmrW7euEV8+x0WFOmRFfOyxx4xUE/eZsZBNEcJNmnGyKFKweemllxoBRlGnDuSf0HbEhSZJDHG7fX9IVkOsbVKzk7SGGNsNGzY0Ek02R8bJP5K9oMwzNuJ/kyCGcZHWnZTj6eqLUC9HQMQ2nv0jXzjHM7rorGhfLNB9UYQ6ukWulpwItRZBpAhAnFGayWzYoUMHi8HMl255hBqVGWUW0khBaYbAEu8ZAgyhxuWDLIyQdQgvCjifke2RaykowxBaMh4Sb/qyyy5bYWyQb1RdMhuiLKP2Qsppg7Tr1EMlPvfcc92QIUOMvKJgox5DdukLRBflHUKNLUgzhHfkyJF2DX+jH2RH5B/j4hrfHxKyQKjJakg6cfDhMAHppj8TJkyw2NgUn6GReN2kRwdHFHOu55rk+ij2FPDkX3LRS4mzejsBAAAVYElEQVSRLvWUjeWLYJaa3cqfyWgtaF8s0H2xWrVoJ1qtlTQCItQlPf3RDx7CSWIWMvjhElGvXj3LpIhynM6HGiWapCvHHnusdYgMhhBnSG8ioU5OPQ6p7ty5s2Um9IVsj927dzeyiauIL7hVTJ061WxAUHHfgMA++eSTRlRRnyHEqN8Qakgvv5PSG0INgSXxTCKhhtiTDZFxQczxkeTFSbI7Tps2zR1//PFlhNr3xxNq2mZ8KNwo3ijxqNT9+vUzuxTcRzhUQMZ9Onde3kHZhhwn1+cA4JO+JM+sXD6iX+upWiw1Ypuv8cYzm9FZ0b5YoPuiFOroFrlakkKtNRAdAvgZjxgxwpRd77eM+wLhY8hUmI5QQ1ZxbUDNJdX3qFGjjCj36tXLrb766uZmkZx6fP78+W7gwIHmttG2bVsj8ZBZCDE+x8mEmlGifL/99tvm3gFpJx04riko1qQtHzRokJFm2g0h1F999ZX1Db9tDgTY7NKli9t3333tAIE7B6o77SUTalKwgxWuG5BgFHIIOEo1ivyMGTNsPIyPQ0kyocZOcn3a/Oyzz1JOqAh1dOu8vJbyRTBLzW48sxmNFe2LBbwvilBHs8jViiEghVoLITIEIKT4I+Nz7AsuFI0bN3YvvfSSvdjXv3//lFE+UHpJR46q/cUXX7ijjz7aCCYv+0Fu+ZmoUOPewGNUfKGbN29uL/aRIvzwww83Aj958mQjsokF32PcQFCDFy1aZGQW9wyU4Ro1aphtXEJwwwgh1JDvf//91+r27NnT+g1pX7ZsmX1On1CrwcT3J1Gh5lpS+KLme7WdvjAG+oL/NmNGcU+lUKeqn24yRagjW+blNlRqxDZf441nNqOxon2xgPdFEepoFrlaEaHWGigsBIhiATn1savpHaR18eLFpkCnK+uvv75F6UinzibXg8RCvhMLPsqowl5Zz4QMPtTDhg1z06dPdyjVvn+QaA4GuJVA3NdYYw23YMGCtM1hd+7cuSv0BzUfxRtXj3QuHL7BVPVTGROhzjSj0fw9XwSz1OxGM1vF0Yr2xeXzVCn7ogh1cdwERdJLKdRFMlHqZmEh4Ak1LhnFUESo45mlUiO2+RpvPLMpK9kiUHT7ogh1tlOs68tBQIRay0MIVAABom389ttvwYp2BUxEWkWEOlI40zaWL4JZanbjmU1ZyRaBotsXlXo82ymu0PWlkvJcqccrtDxUSQgUFwK89DllypTi6rR6WxQI8M6CihAoRgS0LxbjrBVHn6PcF5V6vDjmXL0sEQRIaoNKoCIEokTg3XfftUg3KkKgGBHQvliMs1b4fY56XxShLvw5Vw9LCIF8uXzIbjyLrNRwjgdVWanqCJTafaPxxrOio8ZZhDqeeZMVIRCEQNQ3eJBR55zshiKV23WlhnNuaKm2EFiOQKndNxpvPCs/apxFqOOZN1kRAkEIRH2DBxnVF1YoTDlfV2rzmzNgakAIaH+KbQ2U2v4U9XhFqGNbqjIkBDIjEPUNntmiFKBQjKK4rtTmNwrM1IYQKLX7RuONZ81HjbMIdTzzJitCIAiB0047zRLRxF1kNx7ESw3neFCVlaqOQKndNxpvPCs6apxFqOOZN1kRAkJACAgBISAEhIAQqKIIiFBX0YnVsISAEBACQkAICAEhIATiQUCEOh6cZUUIZIVA9erV3dprr+1+/vnntPXIdjZv3rys2uXikLY33HBDN3fuXPfXX39l3X66CiF2qdugQQP3yy+/uEWLFkViO8RunTp1XI0aNSqEZy7jZQ4XLFjg/vnnn6zHus466xhO6eoypl9//bVCbWfdGVUQAjEgEHIva18Mm4gQLLUvhmHprxKhzg4vXS0EKh2Byy+/3LVp08YtXrzYyO/JJ5/svv766zK7/I1rFi5c6NZYYw13zTXXuIkTJwb1K1PbTZo0cUOGDHFLly41IjZr1ix31llnBbVd3kWZ7Pq6zZo1c3fddZe77rrr3COPPFLpdsFvzJgxrmbNmmbrm2++cd26dat0uxxYGCc4r7766u6VV16xeQwpm222mWvZsqU7++yz3ZFHHmlzlFgaNmzo7rnnHpu/1VZbzdZGaNsh9nWNEMgHApn2EO2L4bOSCUvti+FYJl4pQl0x3FRLCFQKAqussop7/fXX3cEHH2wKMWRv/vz5rkePHmX2XnzxRXffffcZaTrnnHNc+/bt3X777ZexPyFtQ6ZXXnllI5VsqpMnT3bHHHOMmzlzZsb2010QYpe6kL8JEya4Nddc09100005E+oQu3369HFNmzZ1Xbp0scNL9+7d3dChQ3NS5kPsXnbZZW6PPfawee7QoYO78MIL3e677x6EMXO+6667uq233tr6nUyoBw4caCSdg9jmm2/uxo4d6/bff39Tq1WEQDEiEHJPaV8Mm9kQLLUvhmGZfJUIdcVwUy0hUCkI7LDDDu6OO+5wu+22m7V/ww03uC222MJ17NixzN7UqVONLH388ceudevWpj62aNEiY39C2l5rrbVM2UT9Rpnu2rWrEb+KuCT4DoXY5drRo0e7l19+2VTXwYMH50yoQ+yOGDHCNWrUyDHuP//8040cOdKU41xKiN0tt9zSxsuhqW7dum7KlCnuvPPOy8osIZ9SEWqU/TfeeMPdeOONdkh466237IA0bdq0rNrXxUKgUBAIuae0L4bNVgiW2hfDsBShrhhOqiUEYkEApblfv35lauUll1zidt555xUI9dtvv22q8eeff+6aN29uBBzFMlMJaZs2UIoHDBjgdtxxR/v5wAMPZGq63L+H2EV1PeCAA9xhhx3mXnjhhUgIdYhdFPH69eu7Xr16uT333NMdccQRhn0uB4gQuyeeeKIdWD788EO36aabmiLO4Sibko5Q4+LxzDPPuNtuu82a84R6+vTp2TSva4VAwSAQck9pXwybrhAstS+GYSlCXTGcVEsIxIIASimPLlGcIXU333yz2T3//PPL7KPi4mM8adIkcxno2bNnEBkLaRs3DxTOv//+29xJIO25lhC7+BDzKHLZsmWuVq1a9vPqq68O9g1P1ccQu6jEKNMnnXRSGfnE7YMv54qWELt8Yc2ZM8cw5iUq5jKV2lxeH9IRag5YPGHo3bu3W2mllUyt5sCVyyGholionhCIAoGQe0r7YhjSIVhqXwzDUoS6YjiplhCIDQF8qCHSr776qhs3bpy5IDz22GPmeoH6OGrUKIvuAJHGl3rJkiXulFNOCepfpravuuoqczfJVi3NZDyT3e222878fim4Kjz55JOW4CZXv99MdnkCcNBBB7m2bduaT/Kdd95pOOca3SSTXZR/opkcddRR5sM9fPhwe9EQch9akgl1u3bt7OVGnl6gtKNEsUZwF9prr71Cm9V1QqAgEch0T2lfDJ+2TFhqXwzHMvFK+VBXDDfVEgKVhsCZZ55pPtKUH3/80dwg9tlnH3f99de7XXbZxVwxUCEpKMmdO3d23377bVB/MrWNcrrxxhuv0BauCbgN5FIy2U1s+9lnn7XxRRHlI5NdSPy9995rY0bBHT9+vL0QmWvJZJdIHdhFlafgosGb99kUCDVz7yPAoKpDoHEj4QDG2KpVq+Y4JD3xxBPZNK1rhUDBIZDpntK+GD5lmbDUvhiOpQh1xbBSLSEQGwK1a9e2l9XSuVxAxLbaaiv3ySefZP0oP1PblTXIQrZbGXG3Q8bLHM6ePdvC50VdUP2//PLLSmk76r6qPSEQgkCme0r7YgiKy6/JhCXXaF8Mx5MrpVBnh5euFgJCQAgIASEgBISAEBACKyAgQq0FIQSEgBAQAkJACAgBISAEckBAhDoH8FRVCAgBISAEhIAQEAJCQAiIUGsNCAEhIASEgBAQAkJACAiBHBAQoc4BPFUVAkJACAgBISAEhIAQEAIi1FoDQqAEESDhB0lcFi9enFXs44pCxdv3hGL6+eefK9qE6jlniVrinDeBLgRKCYG47y/ti9GsrrjnLV2vRaijmU+1IgT+gwBxgYkD7MuCBQscGaiIP5yPQnZFEsIQz/q0006zf/Rl4MCBldqda665xrVp08b9/vvvbt99961UW6GNb7DBBpY8ZubMmZYIpVhKnPNWLJion8WFgPbF5fOlfTG6dVso+6IIdXRzqpaEwAoI+PTVxIquU6eOxfSkkC587ty5saM1depUt2jRIsugt+eee7pTTz3VCPXkyZMrtS+kBF5ttdXc7bff7u6///5KtRXauCfUxPk++uijQ6vl/bo45y3vg1UHqiQC2heXT6v2xeiWd6HsiyLU0c2pWhIC/yHUy5Yts1TWFJ+FkLSuvXr1MnLLIz/cLkhBffXVV7sDDzzQHuvz2bXXXusmTZrkyBxIevHq1au79dZbz0GM+/bta+4TnTp1sux4tWrVctgiQ17//v3d8ccfbwr09OnTLbPiV199Zam1//33Xzdt2jT3/PPPu7PPPtsNHTrUPfDAA2ltkwacVOQouSQK+e2338z2lClTVhhr48aN3a233moHB2x8/PHHjmxcfLbzzjvbtSQZOfLII8vqJfexR48ebosttnAnnHCCJbX56aefrA0Sn3Tp0sXx91VXXdWyR9Iv0myns7v++uvb0wD6TVZCcCSVe79+/cz1hDYbNWpkiXOSCXW2thgThxMwpv+kLQdT2qW/L774orv44ostPfyJJ55ofdp2220trTr4P/744yuMb+HChZYp8qGHHrJDyA477GDZD5s1a2ZPE/y8Pfjgg27IkCFu++23tychs2bNcscdd5wl+km3Lhgb9bl2yy23LOsr9jjogTHYM8+kYacPKkIgSgQg1NoXtS9WxX1RhDrKnUJtCYEEBPjiIDX4lVdeaeo0BBeyDKmB7K288spGPtlYIMKQJ4gzZHSnnXYysrP77rsbga5Ro4aRbApqL9fjwvH666/bZ++++64RKwg6RPWggw4yckWhHmm8jz32WGtz8ODB1oZ3+aBuOtujRo1y22yzjflZQ0IhjBDHI444YoW5JrV1/fr13UcffWQ/1113XSOVP/zwgxF+xnnXXXe5YcOGldXj88Q+nnfeeUYk6S/ttGjRwkgnh4w33njDMICMQgT5/IADDrCU2qns4s6BfQpjZnwQdfrFOFCowSqZUHNoydYWdTgQQRI4BPgnEaQEb9iwoR2ESB/ftWtXSxVOIVV8gwYNjPxy4Hrttdfscw46jAu8UF0YA5hTvvvuO3NT8fP2xx9/2P9ZZ9hgzUCyOcSkWxeksIfY0/6nn35qxJ412rJlSztwUMaNG2eEHJK+9957x+Jjr42jdBDQvqh9saruiyLUpbOPaaQxI5DsKwiJgaydc8459hNyfcghh5j7B0rlLrvs4iCVKK+DBg0yZRg1G+Ua0gMZgjhBvvCFHjFihOvdu7d7+umnXZ8+fYwEoYRC0CFfkFWIFfYoiS4fiT5nqLzpbEMCIdQnn3yy++CDD6wN+uJVd9pdZ511TEVHOWY8a621lvVh/vz5RuzpA3VatWq1wgx4Qu376H+nne+//941bdrUSPQNN9zgLrzwQlNyr7rqKlNSUZc5JEAwU9lFhYWM4rd+6KGHmir93HPPuTlz5rgOHTrYoePcc8/9D6GmbWxkY4t2IdQcFpiTt956ywj//vvv7+655x7XpEkT161bN8MCQn3fffcZwb/lllvcXnvtZfPLOFu3bm3pgD2BZr5pj99R1nnCkThvgMkhAVIOXvSD69u3b592XXAYglA//PDD5ksPeQbLww8/3P7PIY8nJzNmzLCDF3OuIgSiRED7ovbFqrovilBHuVOoLSGQgIBXYq644gojKW+++aappRQINf/3JBPfYtRCyA5uGpAdCBlk8vzzzy8jsRBqSC0v+PE4HlcDiBBuHhAy3EQgdCi5EGpcAu6+++5yCTVKcDrbKKsQau/3Tb85GCQSaggZfUD57dixoym/uITgusBLiJkIte+jP0RADhN9zFHseXEQjG677TZzPalXr55DAQaDVHZPP/10I9QQQg4DkFo2cf8SIm4wEOBkhfqss86y67Ox9dRTTxmhxh0Dn/R33nnHzZs3z17EBHtcNZgnfodQ8xljZq6YM8aNXcp7771nbiw8QUgk1NSlzURCjUsGByqu41BDQWX+7LPP0q4LXEdYYx5zFG0IO4Qadx0OdKwF//SEpwMc3lSEQFQIaF/UvlhV90UR6qh2CbUjBJIQSPYVTPwzxBQXARRKSrt27RzEGzL5zDPPmHsGqqV/FM//UYFxb0C5RY3E5eOFF15wPPqHPEKKcGlA6cVvGULtSZ5XqFGKIW+QaO86ADFNZ5uNLxOhpm1cFdZee203fvx4t9VWWxmJnDhxorv88sszEmrfR9RWlGUwgNBCQlFfGQc+0LidME7UZa+Sp7OLqwrXcjiBQHqyWbNmTVN6wRVSnkyo8VHP1hb+xtkQau+Cg+81Tyk4MKFW42MOSeYwhbsFL49C+iG8EFt85hMJNcSXeRw5cqQ9kbj00ksNO9ZOunXB2klFqMF6+PDhRtpZVxdccIGtNVT8ZH953ehCIBcEtC9qX0xFqKvCvihCncvOoLpCoBwEsvniQHkeO3as23TTTctahBTiD4si7cPvcR3EEpLJi2W4ekCkfSGiCG4a3n0ikVDjaoD6CWnCXQJyhmqLWpnOtvehLk+hxjZ/x1ec/lE8yURpRqFGjfeHB9/XVH3EFQJXF9pBCR8wYIARY/oIeaTwOYo85D2dXVwnkgk1yrNXgukfrimpwuZlawuCzwuIHmvmHXcXVGX8xvFth8S2bdvWFOqlS5fay4oUDkm49fATlZmx4UePQs3n9DkVoWbeOFTRvm+LdiHVRA9Ity54yTORUI8ZM8Z80llDzAc+0x57ngzwEqWKEIgSAe2LR9kTOO2LVW9fFKGOcqdQW0IgRwSIToELBS/l+ZcQIdS4T+CDu/nmm5uqinLrC4k+cGn44osvjCyXV7gWVTTVY/xUtrMZDu2igOI/DNmvaKEdxsPhgMODL/Rvk002MdU58fNs7KKi8zIgbZdXorCV3P5FF11khBqfadxCmN/ERDco+zwtSBxbCIY8DWA+cX1JLNmsC18PMu2x9+5JIX3QNUKgMhHQvrg8qZP2xfBVlo99UYQ6fH50pRDICwKeUONTrVK8CCQSaqK0qAgBIVBxBLQvVhy7QqpZlfZFEepCWlnqixBIgQBh9nBRwLdXpXgRIA447iz4yBNOUEUICIGKI6B9seLYFVLNqrQvilAX0spSX4SAEBACQkAICAEhIASKDgER6qKbMnVYCAgBISAEhIAQEAJCoJAQEKEupNlQX4SAEBACQkAICAEhIASKDoH/A8ZGk9L6jwoUAAAAAElFTkSuQmCC", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting\n" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.m_u_parameters_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "4fae1eea-8adb-43c7-851c-7130d548037e", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v5+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.9.3.json", - "config": { - "view": { - "continuousHeight": 300, - "continuousWidth": 400 - } - }, - "data": { - "name": "data-5d958450f019fd7fb0586192a9efb3df" - }, - "datasets": { - "data-5d958450f019fd7fb0586192a9efb3df": [ - { - "cum_prop": 3.503608604660258e-05, - "match_probability": 0.00159, - "match_weight": -9.3, - "prop": 3.503608604660258e-05 - }, - { - "cum_prop": 6.30649556114804e-05, - "match_probability": 0.98268, - "match_weight": 5.83, - "prop": 2.8028869564877823e-05 - }, - { - "cum_prop": 8.759021693549585e-05, - "match_probability": 0.98481, - "match_weight": 6.02, - "prop": 2.4525261324015446e-05 - }, - { - "cum_prop": 0.00019269847507530358, - "match_probability": 0.98695, - "match_weight": 6.24, - "prop": 0.00010510825813980773 - }, - { - "cum_prop": 0.0005430593500932446, - "match_probability": 0.9891, - "match_weight": 6.5, - "prop": 0.000350360875017941 - }, - { - "cum_prop": 0.002014574978602468, - "match_probability": 0.99126, - "match_weight": 6.83, - "prop": 0.0014715156285092235 - }, - { - "cum_prop": 0.007291009567779838, - "match_probability": 0.99343, - "match_weight": 7.24, - "prop": 0.00527643458917737 - }, - { - "cum_prop": 0.038970639847320854, - "match_probability": 0.99561, - "match_weight": 7.83, - "prop": 0.031679630279541016 - }, - { - "cum_prop": 0.9999999906049197, - "match_probability": 0.9978, - "match_weight": 8.83, - "prop": 0.9610293507575989 - } - ] - }, - "height": 400, - "layer": [ - { - "encoding": { - "x": { - "axis": { - "format": "+", - "title": "Threshold match weight" - }, - "field": "match_weight", - "type": "quantitative" - }, - "y": { - "axis": { - "format": "%", - "title": "Percentage of unlinkable records" - }, - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": { - "type": "line" - } - }, - { - "encoding": { - "opacity": { - "condition": { - "empty": false, - "param": "x_match_weight_y_cum_prop_coords_of_mouse", - "value": 1 - }, - "value": 0 - }, - "tooltip": [ - { - "field": "match_weight", - "format": "+.5", - "title": "Match weight", - "type": "quantitative" - }, - { - "field": "match_probability", - "format": ".5", - "title": "Match probability", - "type": "quantitative" - }, - { - "field": "cum_prop", - "format": ".3%", - "title": "Proportion of unlinkable records", - "type": "quantitative" - } - ], - "x": { - "axis": { - "title": "Threshold match weight" - }, - "field": "match_weight", - "type": "quantitative" - }, - "y": { - "axis": { - "format": "%", - "title": "Percentage of unlinkable records" - }, - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": { - "type": "point" - }, - "name": "mouse_coords" - }, - { - "encoding": { - "x": { - "field": "match_weight", - "type": "quantitative" - } - }, - "mark": { - "color": "gray", - "type": "rule" - }, - "transform": [ - { - "filter": { - "empty": false, - "param": "x_match_weight_y_cum_prop_coords_of_mouse" - } - } - ] - }, - { - "encoding": { - "y": { - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": { - "color": "gray", - "type": "rule" - }, - "transform": [ - { - "filter": { - "empty": false, - "param": "x_match_weight_y_cum_prop_coords_of_mouse" - } - } - ] - } - ], - "params": [ - { - "name": "x_match_weight_y_cum_prop_coords_of_mouse", - "select": { - "fields": [ - "match_weight", - "cum_prop" - ], - "nearest": true, - "on": "mouseover", - "type": "point" - }, - "views": [ - "mouse_coords" - ] - } - ], - "title": { - "subtitle": "Records with insufficient information to exceed a given match threshold", - "text": "Unlinkable records" - }, - "width": 400 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcsAAAHeCAYAAAD0GSBvAAAAAXNSR0IArs4c6QAAIABJREFUeF7snQl0VdX1xr/MIWQmAyAighOCCIKiIqOiaNUWap1FRUFQtPwrpWKxA0VbioIgBRQHpFqlCBUQZJ5ERkFFQUBRRCEhhEyEzCT/9R176eORl7x3932PkLfPWllicve55/zuvue7+9xz9wnp2LFjFbQoASWgBJSAElACHgmEqFiqdygBJaAElIASqJmAiqV6iBJQAkpACSiBWgioWKqLKAEloASUgBJQsVQfUAJKQAkoASUgI6CRpYyfWtczAqGhoaisrDypV/wdi/vvPXXdtY6EhARERUUhLy8PZWVlp42Wr304bQ3VEyuBOkpAxbKOXhhtVuAJ/POf/0Tr1q3x2Wef4eGHHzYNSExMxPLly82/f/7zn+PAgQM1Nuwvf/kLbrzxRmzatAmPPfYYPvzwQ6SmpuKFF17AO++8U6PtLbfcgj/+8Y/44Ycf0LdvX58ArF27FjExMRg0aBC2bdt2iu3ChQuRnp6Ov/3tb3jvvfd8qruuHfyPf/wDnTt3NtflqaeeqmvN0/bUUwIqlvX0wmq3fCdgieXnn3+Ohx566BSxpIBRyGoqY8aMQZ8+fbB582Y8+uijeO6553DhhRcasVy/fn2Ntqz/97//PTIyMkDh9KVYYjl48GB88sknHsXy73//O/7973/7UnWdO3bKlCm44oorsHLlSowYMaLOtU8bVD8JqFjWz+uqvbJBwBux7NevH37xi1+AgnrBBRcgOTnZRJuMKBmRuovla6+9hvPOOw/PPvusmYodO3Ysvv/+e0RHR6NFixbmd2+++aaJOt3FklFm9+7dUVhYiLvuugtdu3bF8OHDER8fj9LSUuzfvx/PPPMMvv32W1hiuWXLFrRt2xbh4eH46quvMHToUBQVFcGKLC2xvPvuuzFw4EA0bNgQJSUl+OCDD8C/uZff/e53uOmmm7BmzRp06tQJBQUFuPPOO1GTPdtM0WdUzrZv374do0aNMv9u2rQpJk6ciObNm4NTw2wbI0VLwHmeo0eP4osvvjB9f+KJJ8wxzz//PBo1amTqOHbsmKnHEssePXrgkUceQcuWLc1UN3kwquexWpSAUwRULJ0iqfWc8QS8EcvHH38cvXr1Mn2tqKhAWFgYQkJC8PXXXxtBcxdL12nYI0eOmEiTpaqqCsePHzeiVl5ejquuuuoksVyxYgXuvfdec9ywYcPMtC4jUwpMdna2EbkGDRqcmLK1xJJ1FxcXm7+xLF26FE8//fRJYpmTk2OmY1n476SkJNMHCjYjYNcyadIkXH311Sd+RYFmZOfJfs6cOZg9e7apj6LGqWH++8svv8SAAQOwatUq03a+/6VI8+8sjBApfu5RMXmzTZGRkYYF7cichcdThCmwERER5uHgnHPOMXXyPL/97W/PeJ/UDtQdAiqWdedaaEtOMwFfxJIiwwiTURzFg4M4pwa9EUsee9tttyEzMxPr1q0zAsgp27POOstEZBRRSxAsETn77LPN4E+hXLZsmXmneumll5qoq1u3biciy3nz5pkot3///iYqYyRIcXeNLBkdM9qlgL344oumLZw6tuqqTiwZsY0bNw7/+c9/8O6773q0//jjj9G7d+8TU8kU2j/96U/ggwL5jh492ohez549TeQ3c+ZMXHzxxSba/uUvf3lCLCmaf/jDH0w0PXLkSMP31ltvNeJuMaNYvvHGG6Zelo8++shE/Hw3S7aM2LUoAacIqFg6RVLrOeMJvP7662jXrp2JUO677z7THwoYBYiF05GcBqX4MMqjGKWlpWHRokVGAC6//HKvxDI/Px/XXnvtiQGeUeBvfvMbM81IsXQtjAoZHVJQKSwXXXTRSX93F0tOu27cuPGkhUkULIocRYRTrZyy5Cpd92IJfnViSfHi+1AWRr2e7Pfs2WPaWF1kR3acwqVw3nDDDaYuLkjiD6NQTrtakSWnVimmXMBDMXd9j/v222+b98DWNKz1/1a7+bDB6e9XXnnljPdJ7UDdIaBiWXeuhbbkNBNg5HbHHXeYd2aMfFg4uHOQ5wDMFZgUG4olpz0pcHbEktHR9ddf71EsKVpcSMQpRU6psi2MJPnDCI/RJt/PWe/zXCNLRlMvvfSSiRQZ5Vpi6hpZcrqYkSrfsXKalZ+28B0iBWvx4sUnXQVrGtZV/Ci8nuwvu+wyMHJl1PrAAw+YqJHvPQ8dOmSEkG23WPJEVv2WGFpiyfejLHyvSnFnv63p4CVLlpgHC4rlyy+/bKaw+TDBd8A8JiUlxUSi/DenyrUoAScIqFg6QVHrqBcEONBb0QgFjVOenK7kQMx3dRSBQIglhYPvKxlRcjqWkSunKYcMGWLEj9OgFHG+m3OPLBnh8v1pq1atjC2nJv/v//7vpGnYJk2amMiZgsL3few3I8Vdu3aZ81YXWbqK5a9//WuP9pwypZiy8D0rxTIuLs78m0LJqJTvabmwiQLKCJFl6tSpJhp0F0su5GFkz/eevCZ8v8sImYViOWPGDBNxsy/8lITiyBkA/veaa65RsawXd2bd6ISKZd24DtqKOkKAi1A43Wh9xM9mZWVlmdWfHODdxZJRDKMxaxrW03eWXM3JwZ4LfDxFllxow9WtBw8eNO/nGDny3SPrpji++uqrRnhYGHkyuuPf+O6Ui3MonpzitaZIGSHfc889pj737yytbxUt7HwwYCTId321iSX/XpM9F+RwStUqhw8fNvxyc3PB6dW//vWvZkGOVawonf/vLpb8HRlwoQ8Fk4X8uAqZwsuo1b0tZMIVt2+99VYd8SptRn0goGJZH66i9sFxAoyImExg69atdeoTBLaLwkYB91Q4NUzBZIRZU6E48x0tP7Wo7fvR6uqpyZ5/4wIk1s2o3LXwQYTJHziVygQK3nziwRW0bCs/Q+F0sXthn8mGK2z52Ul1xzjuJFphUBFQsQyqy62dVQJKQAkoATsEVCztUFMbJaAElIASCCoCKpZBdbm1s0pACSgBJWCHgIqlHWpqowSUgBJQAkFFQMWyDl5upvYKxHZOPA+X2Hu79ZQ/UHGxBxdnWKswubqUWWcC0X8n+nOmtdeJPtf1OuhT/DzlTPGh08WTjFh8+RbV32ydHpOcrC/oxZIOw4wnroVLz7nMncvSP/3004D6Mpfcc+m99VG2Eyfn0nymSONnDczwwuwzXIXI3zN3KZfy11Rc7Z1oj1UHd9Zgxhqu7rz55pvN1lH8sPzJJ580qdX4uYWnHTL4nSA/J2CKOTuFqyt5Hn4Uz09CXIu3/XVtL79XDFThqlAmRmCqOikH9za7+keg+uP0eZiMgfcPEzM4WbjCmJ+r8HtXJqyvy4WfHvHzHObq9VSmT59uViTz++Gairu/0UeYoMMfxdsxyfXc/HSIn1Gxz+6Fn0xxdbQTW7mpWP5XLJlfcsGCBYY1P9qmYPGDbytptj8co7o6/SGWzBbz3XffmU8JKJQc5MePH++1WLraO8mBOVW5cwfr5xMgc35ydw5+1M69I/khu/t3f9b5+bE608tZqeh8bZeVxo7fIe7evfskc2/6y11DXNvr6/klx/M7UH4TeeWVV5rdNyQc3Nvh6h+SNp5OW3+JJaMqpt5jBiF+z1qXC8cz3lPMSCUVS1d/48NZXRNLjiMUy+q2tVOxdNBLrciST+muHzFPmzYNHTp0ME9Q/NibF4Q5LzllwQGakQ8LP7Jmcmjm92RGkj//+c/mqbNLly4m3VhsbKz59ov18ymPAzEzqnCAZlTElFzMiPKrX/3KfGC+d+9ek5ybT8bXXXedOZbTlPwWjXlCN2zYcFLveeOyPWwfM6QwQmMGEx5PUVy9erVJRs3sJhxc+e0b043xaYzp05gthvUz0vrmm2/MB+Du0zKzZs0y9vzwnZEo/83cnhw8+EE+E2WzMErjwM3f8/tE3lT8YJ43m5XejcmyeV7uTMHf80Nzpl2js3NKk6y4ywZFk4x5LvLhR/nMSEM+TIHGFG/8aJ/Rg6frQ/ae2suMNPzAn1N1/GB+3759J7h601/227W9TBnnzfVmUgK2mw8v7du3NzMYzElLVhRgfpTPpOos/C8HO/oWHxo408Hz0Dfpt/zYn6ntLA78Ha9Bx44dT+yEwiiI0+w1XTer48yg4+of//rXv6qtj9Pk7oW7f9DveO35JM8tstguPu1T2Pm95eTJk3HuueeaQY3Xr7r7hpmH2Ad+40of5u4mzGTEQt9mSkKLB8/B70N5TmYAYgTE9IAUMh5TXWTJNjBjEf2Obbr//vtP8XdyJFdeH/aVPsd7htEJsymRKe9nZhCydl/h33jPs2+e2smZHT6I835r3LixqZv3gqvvsZ88jpy4FRuzFfEhl2MK7wEWJo+3Mk1V1x/eV+wXC8cLpmvkcewP721mdaIvMbJkSkVmRSJvtufBBx886Ztb2rj6G32CMzpMZ9imTRtjS58mFya+Z9pEJoywNguozi94z3ka2xhZehqTyJ1izYiZiSk4Nu7cudOMfZZYkivZkDG/RbayR2lk6YBoWmLJlFnvv/++ufEoYrwQvBFvv/12k1KLQkmxoKhw2oKRJyNR3jA//vijSUFGx+cNzgvK3zODCh3txhtvNDcyBz6KIEWNTkahmzt3rhksmYCaDkhxYRt4HB2HNzRFlvkx+aRoiY7VddrzRqbYsj28GSk0HGA4NcipRt58FB1OxU6YMMGIEx2cDwcUGjo6ObDu6jYHZjtoz6wonIZi29kvRn+8OfjAwPylzD7DczNaJDc+OPBmZ6Jsa9qGQseNlSlQZEhn5k1GJ+fDABlz0Gb7OQ3LBxAeR55Mx0Z7PhxwD0lrGtbT9WG/PLWXok172pI/ZxGs4k1/ycpqL9vMaNib682Bl+JPoeP14ADI603xZiFHPrBwAOWDDvvN9nDAo2BywCBj+ih5c1CwOFB86GvsM6fZee3pU8xh64mDa0IA5lh19Q/6c3X1WRtjW7z4/2wX28rzUcT4+oJtZh85aNF36FsctPkQV919w6iNPkv/oiBwVueSSy4x4kM/5ZQ8hYOs2TY+OHAmhgJKfyMnDqR8AOXUvrtY0k/ZBl5z/p3t433PNrkWig37xOt6/vnnmwdAKwUhOfIhhg909Flr5onXjw/A1v1TXTspTnwA5wMRk/VzVoPXyUpQb7XBOo4MeV4+hFB8+IDJa8TZBD74euoPxxWKOe9pvubgfUoWFBFOu5Iz+fE6sT1M9MDdWnhP8iHXelhjezj17O5v9F/y49hD/+WDB/3W2pSbDyu8h/nAUp1fsL+exjZrqzn3MYlZqNgnjpO8LvR5PlySAa+fJZZk1KxZMzNG8B7h+MSxS8XSQbGsrioO8hQW7nrPG9x6ipw/f77JEMKBgI7FHSToIHwKoyPxyY2/p4PSqVj4ZMgLyUGQYmntaE+h4HFWkmhuDsz6KJZ8l8qbhcezfl58btLrWnjDc+DnTcR28qmN/+WgwxuFNxUHXToMB1P3aVje4BQgFk4r8jgrUvQkHpzC5ROmleqNEQTPw5uNfyMr5uXkwwYjiOrEksdTcDnY8+GDDyFsM9vCQYJ9oFhysGbScCt9Gh9E+NTNyIw3DOvxdH3ImINbde1lrlXeUNVNw7qLZXX2HFys9nLA8fZ6s/30ET5Z810pBxxyJCeKJvvCAZdbXZELIxy+x6XvcZDkce7TYpZY0u84mFhixutIfhR2TxzoD67F1T881ef+vooPNhRd690XfZrnpU/SN5kvlg90fMDhMUwpWN19w4ie0TnbT77kwXuAD3Uc9DhDwjopHLzuHCT5AEqBZ1Rk5bW1ksG7iyUjPz688f5lth/OrjAy4YOoa+F9St/lQwYL+8fNtlm/JZbWdmqMnDloU8h5HdhfT+3kgM0HScuX+ZDGiM59+pBiyZy5vNYs9AneE2ynlSCf9xdnEDz1x3Ualtdxx44dJ0SZjCmQ3ErN9Tx8iKCYurfH3d/ov7TlrAivFf/OBwqKJcctXhteo5r8wtPYxnuvujGJ1595mi0mHC85A8FZC/oSxZIPBayXD1qcaWBwwfuI10zF0kGxpHNx8OQNyiiSURGFjYJDYXAvjHj4VMhBjzewa2EkR8Fz3TSXTkCn5w+fhOlcLLxxGdVYUyx01D/+8Y/G6XhzUojYJk4v8SmYN5Jr4bQVBxkOUBwcKaYcFDgIcIqFg5M1+FcnltbCH9bJp322z92x3MXDEnra8CmZx1OQGQFzELGSefOpkk+xrmJJx2bk5K1YcjqJEZvFx+q7tbCFjDxdH4o3b5Tq2ssne2/Fsjp7Tm9ZYknG3l5v9wU5nJHgwMK2Wg9VHPzZNg4GnJHgkzvFhgN2TWLJBzIOFBy0WMiGERDb5okDH448iaWn+twXn5GFa65X1mflyrX8ilOKlgjy/qjuvmFb3RO5057ix4GSP+6F0TMfqiiojF5ZyI/p9NzFktOGXMzGhw62j4X3sLtY8h5gHl5rqtOa5nMVS/o9+01hZdTLBxIKmLXXZ3XtpD0fjPr27Wv+zOtL8XRvJ+9xPkRy8GfhebgNGR8OGEmxv/wv729P/XEVS15H2jMycy3u5+HDA8cT9/Z4ejhjXby/ef/TJ9zfHdbkF57GNvfFddaYxBkGPlzy4ZaF/mRt/s0HFoolZ8v4cMkHKM7gsNC/rTHqFOfx8Re6wOe/C3zc31nyKYsRDR2WgsKbhzcCC29Ezv1zioZPnxRFRnR0fE5v8OJQECmiPI5ix8UqvJgcVFzFklsMMXKw9vfjuwROqfLpk6LHi80bgyLDpysOlnyv5z7A8SmPgy4HIjo9I1G+p+IUZk1i6boa1luxZB8ZcbNYjsipNkbbFHVOiXHw5yDHCI6DkbVqlVET++CtWHKai5GkxYdTUrwu7B+f5Pl7T9eHURZForr2+iKW1dm7iiX9wNvr7b5Agj5FIbQGAQ5sHOz53odPzXznY80KcDCuSSw5Fcgna0ZyLPQ3RoEUc08cahJLT/VZe3FaPmhNqVHsWPgqgAM5rz+vl7XhM9+Jc3sw+kZ19w2jH15T+g/fSbHw3uKDH6eceX9ZUQ+jBkZw9F+eh/cXrwELH9o4U+E+6POhilN0fD/MyIpTfYwg3cWSDHkvcJqdhdeI9bmKJQXVigw5HczIj/cupx89tZNi7rr6tCaxdD2OvsaHAd7XrmJJMffUH1ex5EOd9ZDO/pAxxYSi7Xoeb8XSdYFPTWLpyS8Y6Xoa2xiNVjcmURw5DnDGgsXiwHGSaxsolnzfSl9ggMOpaBb6Bf1YI0sfnwyqO9zTAh9eAF5Q3qy86Ixu+GTMp1sKEC8Gp8boiBysKQJ8UrNWoNEheOEoXrzJGGWxPr6TdBVLDiaMYjlAchqUIk3HoBMwWuMNwXo5yDLK5BO5+0o8a9Nino+DK28MCjQHHb7zdBVLnoPOw8HHfZm2RCwpWlw4wXby3RrPQUdlm8iGPxyEOPCwbd6KJaf3yIes2S++ryB7Rv3W9KOn68Pr5Ekk+G6NUTgHbdq7FvdIujaxpPB6e729FUtrOouDAgdEaxsqiiWnqvlD8eRTtMWBPkuu9BNOZXHQofjSB70VS1f/8FQfp/5dCwWC05p8B8anf06B8b0j/Y9izPdNjJQ5fcdIiO2q7r5hPzgtTZYc8PgAQSHjNeJAyb/Tjyyfor+RAevkQwHXGfA9mbXDi7tYciCmCHOWgv7K6UhO9/L6uha2gdOjPB/fS5IffdpdLBlN8oGEhQ8AfDjjTIindlKMnBRLttNTfziOsA8/+9nPzHhFPrw+fAfJ1wp8+KYPeSOW7v7mrVh68gv6rKexjesuqhNLjie8XnxNxdc8nEHhLBavgWtEy2vMB3aOt3wYoIDqO0sHhJJVWGJpvZeyquXTOd8/UCDpULwhuRqThe8hKXqM8PjUTMFj4UWiHQccHs95fU73cBqN0608lk9ifAK1pmEpHFzeTMdm4S7yPB8HS2u6hREj66BIUAzdizXdxnebFDyei5EJBxIW16kNS1j5pMqIw90xKXBWZGKdx7Lnk3B1gy7PS/GikJER+0wWjCK4kIWDGx8A+Hve3Lxh3cWSU1k8jyVM1jtL3hiMFPjkyEJ7tps3giUSjD6ruz584vfUXmvhDM/r/t7Sm/5S+F3b6+319lYsGbFTIC2fo7hzKorREKfUGDkwIuVAYXHgFJoVUVj+yMGbPlUTB1d/cvUPDlzV1ccHMNdCQWHUxmtsnZeDFafvrMiWERwFk9OLjEz5pF/dfcNpVfqNVciZgzMLfZavSFh4T3A2iOdlZM9Nr/mgavkI2VhRiFUXWVh18e/0WT78si2uETbrowhSoHke3udkyEjf/TtLRn18jeIabXtqp/t3jbyfeC73drofx3PwOvBau0ZUHEs89YezL3z9wWieETJ9ie9WeQ8yqua45n4eijnt3NvD95qu/uZJLN2/d/TkF/RlT2NbdQ/wHJP48MRxjXWyD/zheMoHXtfzWuMrx0vr2vG9pfuYZkc+gn4a1ltovBEpYHxy5rSQa+Hgz0HAfUskXlg6Gp9e3T98dz8vl+wzirLm2q2/8+mJdVCAncpIwjr5ztWXzB3ecOJUFRc58SncvR8cyLlYyXXVqTd1WsdQLBmtePrusqbrU9N5+P6KkY8TxZfr7c35+CDF6ImDG689fYwDNxezsL8UJ2sBmVUfH/644IgPK/RTO9mZXP3D2/rYVoo5Izs+6HhzXk/3jbX1F+8n92vDVxac4nXfOo08uOCFNu5MXFnTRylQZMo2sj6uZne9F/iQzIc8noOvFijyfEBg5Opt8dROb+29Pa6m/tBf2H5ruzJG/3xAsHMPevK32tpZk1/YGds4xc1ggtfG03iYmJhoZjr4Ptr9lVVt7a3p7yqWEnpqqwSUQL0jwGlmrhtgREJB5yIrRrzu0/X1ruPaoRoJqFiqgygBJaAEXAgwGuLrAC4g4loFfhbCd2xagpuAimVwX3/tvRJQAkpACXhBQMXSC0h6iBJQAkpACQQ3ARXL4L7+2nsloASUgBLwgoCKpReQ9BAloASUgBIIbgJ+FUsuN+ZScteP6PnynCvMmHHGtXC5OI+racm5tYOH0588BLcLaO+VgBJQAkqgNgJ+EUt+m8WPZ/nhKz8etfJeWknD+Z0PRZMJlSmOzDbB/1o7YDBTAz/AtcSRHzfz7/wolXlFa/tmsbZO69+VgBJQAkpACfhCwC9iyewxTGjLNF3caYBiySiTOxowawfziDLbC7NiUEz5YS2F00oKzhyTzFTDdFX8IJip4JhZgh+QU0i1KAEloASUgBIIJAG/iKXVAaYAY7YNiqW1iSjTnLEwx6O1iwDTOTEPpbVFEXMRMv0cd1mgkDJtHFOeMcWZU1lsAglZz6UElIASUAJnNoGAiSXzd3ITUWs7K+YrZPo45itkzlNrex2myqJYcgcCRpZMjMvco0yVxt0+2rZtaxI1M+JkYf5D/riW/fv3m8hVixJQAkpACdQfAt+XJOGHkgS0iM5Fs+h8v3WMW6+5l4CJpbX/GLdq4vtHZsJnYeTInKhMMcV3nYwymXzZWujDPH/WPo5Mzsw93ZgUl7tyeFoMxCTU7nvu+Y2qjYqZSqu6i2GjKr+YaPtkWJWf8pMRkFnXZ/+Lv+haNDz7MuTvWo6iHz6VgfJg7YlfwMSS7eI7S4okt+DhbhLcwoWLetg4Rp58V8kdJazdsGnDzPzclYPvP5l+ihnvuYsFj/c0JatiKfOh+nyzych4Z638vOPk6Sjlp/w8EUhscyMaNG2LvC8Xojjj5A0tZNT+Z33axJIbFlsb2zIy5EIeFu56wR2uueKVU6qMMLmtCrdd4dZALMzcz3eZbDynaxmNcSsrroq19pGrDpCKpcxtdLBSfjICMmv1P+XniUBSu58jOv0C5H72H5Qc/kYGqi5FltW1hdOqFDz37azatGljtnVy3VKFW8ywcPWsL0XF0hdapx6rg5XykxGQWav/KT9PBJIv+xWiGrXAkU9moSx3vwxUXRdLv/TOrVIVSxllHayUn4yAzFr9T/l5ItDo8rsRmXgWsjfNRHnBIRkoFUtAxVLmQzpYKT8ZAZm1+p/y80Qg9aoHEB6bisMfv4qKopOzwMmo/c/6tLyzdKrxvtajYukrsZOP18FK+ckIyKzV/5SfJwJp1wxCWIMEHFrzD1SWFclAaWSpkaXUg3SwkhFUfspPRkBmXZ/9L73HUIRGNEDmigmoqqyQgVKxVLGUelB9vtmkbLyxV37eUPJ8jPJTftUSCAlF417DEBIahoxl42SQarDWaVi/ofW9Yh0MfGfmaqH8lJ+MgMxa/e/08AuNiEZ6j8dRVXkcmSvGyxqhYvkTAX1nKfMjHQyUn4yAzFr9T/lVRyAsOh5pXR9BZXkxDq2eLIOkYqli6YQH6WAlo6j8lJ+MgMy6vvpfeGwKUq96EMeL85G17hUZJBVLFUsnPKi+3mxOsPGmDuXnDSXPxyg/5VcdgYiEpki54h6UH81C9sY3ZZBULFUsnfAgHaxkFJWf8pMRkFnXV/+LSj4HyR1vR1nujzjyif92ldIFPjL/c9S6vjqzo5BsPPkF6vy1nUevb22Eav678lN+1RGITjsfSZf+AqXZe5Hz6VwZJBvji193HfFbb2qpWBf4yMjrYKX8ZARk1up/yq86Ag2atEFi25tQnPkV8r74QAZJxVKnYZ3wIB2sZBSVn/KTEZBZ11f/izm7AxIuug5FP36G/K+WySCpWKpYOuFB9fVmc4KNN3UoP28oeT5G+Sm/6gjEtuiMuPO7oXDfZhz9eo0MkoqliqUTHqSDlYyi8lN+MgIy6/rqf3HnXYPYc6/C0W/WofC7DTJIKpYqlk54UH292Zxg400dys8bShpZyigFH7/4C3uhYfOOKNi9Asf2b/MXPuhqWL+h9b1iHUx9Z+ZqofyUn4yAzFr97/TwS7i4D2LOugR5Oz5E8cEvZY3QyFIjSyc8SAcDGUXlp/xkBGTW9dX/ktrdiuj0C5H7+fsoyfpaBkkzD/3+AAAgAElEQVTFUsXSCQ+qrzebE2y8qUP5eUMp+KYRZVS8t66v/pfc4ZeISmmJnK3/RmnO994D8fFInYb1EZg/D6+vzuxPZjoN6xxd9T8ZS+V3evg1uvwuRCY2Q/amf6K8IFPWCI0sNbJ0woN0MJBRVH7KT0ZAZl1f/S/lyvsREZeGw+tfQ8WxHBkkFUsVSyc8qL7ebE6w8aYO5ecNJZ2GlVEKPn5pXQYiLCYRh9ZORWVpob/w6WpYv5G1UbEOpjaguZgoP+UnIyCzVv87PfzSuz+G0MgYZK58EVXHy2WN0MhSI0snPEgHAxlF5af8ZARk1vXS/0JC0LjnrxESFoGMZeNkgGqx1gU+fsXrW+X10pl9QyA6WvmJ8HmcZpLV6py1Xl8Zy/rILyQ8Co17PgFUVSFj+fMyQCqW/yOgu47IfKk+3mwyIr5ZKz/feLkfrfyUnzuBsOg4pHUdjMryEhxa/ZIMkIqliqVTHqSDlYyk8lN+MgIy6/rof+ENGyH16gE4XlKArI9elgFSsVSxdMqD6uPN5hQbb+pRft5Q8nyM8lN+7gQi4hsjpfN9qCg8jMMbZsgAqViqWDrlQTpYyUgqP+UnIyCzro/+F5ncHI063oGyvAM4suVfMkAqliqWTnlQfbzZnGLjTT3KzxtKGlnKKAUXv+jU85DUvi9Ks79Dzqfv+QudqVdXw/oVr2+V62DqGy/3o5Wf8pMRkFmr/wWeX4MmFyOx7c9QnLkLeV8skDXgTIksQ0NDkZCQgNzc3JOanJSUhPz8fFRWVnrsSmxsLEpKSlBRUVFjd3U1rMyXdDBQfjICMmv1P+XnTiCmWXsktO6NogPbkb9ziQzQmSCWkyZNwvnnn28EMSIiAg8++CDCwsIwY8YM87uYmBgsWrQIY8aMwZQpU2CJ4+DBg83f58+fj/79+yMvL0/F0o/uooOVDK7yU34yAjLr+uh/DVtcjvjze+DY91tQsGe1DFBdF8t27drh9ddfR48ePVBYWGiE79tvvzVi2bBhQwwYMACtWrXCrFmzMGzYMIwaNQp9+vTBvHnzMGHCBLRs2RJNmzY1Qlpb0ciyNkI1/70+3mwyIr5ZKz/feLkfrfyUnzuBuFZdENvyahzd+zEKv10vA1TXxTI5ORlLlizBtddei4KCAqxYsQLZ2dkmwtywYQPGjRsHTtFu3rwZAwcOxPjx41FaWmqEtHfv3pgzZw769u2LsrKyWkGpWNaKqMYDdLBSfjICMmv1P+XnTiD+gp5oeE4nFOxeiWP7t8oA1XWxZPvmzp2LtLQ0824yPT0d27dvR+PGjY2ITpw40XTBEssdO3aYyHLp0qUYMWIEMjIy0Lp1a7Rt2xZTp041ESfLoEGDzI97GTt2rF+BauVKQAkoASUQGALfFDVCZlkczo/JRnqk/3YcsXoze/bsUzoW0rFjx6rAdPens9x6661mSrVTp07YuXMnmjdvbqZlhw8fjvDwcBNldu7c+cRCn8TERMycORPTp0/HkCFDMHnyZIwcORLdu3f3uBhII0vZFdUne+UnIyCzVv9Tfu4EEi+5BQ0aX4Tc7fNRcmi3DFBdjyy5eGfVqlXm3SSjxIULF+LJJ5/EpZdear5r6dWrl3lX2a9fP3Tr1u1Ed7goiMc2a9YMXbt2xdChQ7F48WJzvKcpWRVLmS/pYKX8ZARk1up/ys+dQHKHfohKaYWcbbNRemSfDFBdF0u2j9OnFEdGkBS0Rx99FPHx8WZKle8mQ0JCMHr0aCxY8NN3NC1atDDvMnnzREdHg6FxamqqWRz03HPPeeyyiqXMl3SwUn4yAjJr9T/l506gUac7EZl0NrI3v43y/IMyQGeCWLKNKSkpiIyMxMGDJ3e4TZs22Lt3r/mO0ip8v8mSlZXlExwVS59wnXKwDlbKT0ZAZq3+p/zcCaR07o+I+HQcXv86Ko4dkQE6U8TSr738b+UqljLKOlgpPxkBmbX6n/JzJ5Da5WGExyQh66NpOF5yVAZIxfJ/BFQsZb6kg5XykxGQWav/KT93AundHkVoVENkrpqEqopSGSAVSxVLpzxIBysZSeWn/GQEZNb10f8a9/o1QsIikbH8eaDKvx9waCJ1mf85al0fndlRQDaf/ALZhprOpddXdiWUn/JzJUCRpFiyZCwbJ4PjhbWKpReQAnWIDgYy0spP+ckIyKzV/wLLLzQqFundhqCqogyZq35KXuPPomLpT7o+1q03m4/A3A5XfspPRkBmrf4XWH7hMclI7fKQWdjDBT7+LiqW/ibsQ/16s/kAq5pDlZ/ykxGQWav/BZYfPxnhpyP8ZISfjvi7qFj6m7AP9evN5gMsFUsZLOWn/BwnIKvQ1/GPyQiYlIDJCJiUwN9FxdLfhH2o31dn8aFqRw7V9skwKj/lJyMgs65v/heV2grJ7fuZNHdMd+fvomLpb8I+1F/fnNmHrjtyqPKTYVR+yk9GQGbtq/81aNwaiZfcbBKoM5G6v4uKpb8J+1C/r87iQ9WOHKrtk2FUfspPRkBmXd/8L+asS5Fw8fUoOvAF8nculsHxwlrF0gtIgTqkvjlzoLhZ51F+MuLKT/nJCMisffU/bvrMzZ+56TM3f/Z3UbH0N2Ef6vfVWXyo2pFDtX0yjMpP+ckIyKzrm//Ftrwaca26oPDb9Ti692MZHC+sVSy9gBSoQ+qbMweKm0aWzpBW/5NxVH6B5Rd/QQ80POdyFOxZjWPfb5Gd3AtrFUsvIAXqEL3ZZKSVn/KTEZBZq/8Fll9C6+sR0+xS5O9cgqID22Un98JaxdILSIE6RG82GWnlp/xkBGTW6n+B5ceVsFwRm/fFAhRn7pKd3AtrFUsvIAXqEL3ZZKSVn/KTEZBZq/8Fll9S+76ITj0POZ++h9Ls72Qn98JaxdILSIE6RG82GWnlp/xkBGTW6n+B5deo4x2ITG6OI1v+hbK8A7KTe2GtYukFpEAdojebjLTyU34yAjJr9b/A8kvpfB8i4hvj8IYZqCg8LDu5F9Yqll5ACtQherPJSCs/5ScjILNW/wssv9SrH0J4w2RkrXsFx4vzZSf3wlrF0gtIgTpEbzYZaeWn/GQEZNbqf4Hll9ZtMMKi4nBo9WRUlhfLTu6FtYqlF5ACdYjebDLSyk/5yQjIrNX/Asuvcc8nEBIehcwV41FVeVx2ci+sVSy9gBSoQ/Rmk5FWfspPRkBmrf4XOH4hYeFo3HMYEBKCjGXjZCf20lrF0ktQgThMbzYZZeWn/GQEZNbqf4HjFxoZg/Tuj6HqeDkyV74oO7GX1iqWXoIKxGF6s8koKz/lJyMgs1b/Cxy/sJhEpHUZiMrSQhxaO1V2Yi+tVSy9BBWIw/Rmk1FWfspPRkBmrf4XOH4RcWlIufJ+VBzLweH1r8lO7KW1iqWXoAJxmN5sMsrKT/nJCMis1f8Cxy8yqRkadboL5QWZyN70T9mJvbRWsfQSVCAO05tNRln5KT8ZAZm1+l/g+EWltERyh1+iNOd75Gz9t+zEXlqrWHoJKhCH6c0mo6z8lJ+MgMxa/S9w/KLTL0JSu1tQkvU1cj9/X3ZiL61VLL0EFYjD9GaTUVZ+yk9GQGat/hc4fjFnXYKEi/ug+OCXyNvxoezEXlqrWHoJKhCH6c0mo6z8lJ+MgMxa/S9w/Bo274j4C3vh2P5tKNi9QnZiL63rjFiGhoYiOTkZ2dnZJzU9KSkJ+fn5qKys9Nil2NhYlJSUoKKiosZuf/LJJ+jUqZOXaAJ/mN5sMubKT/nJCMis1f8Cxy+25VWIa3UNCr/bgKPfrJOd2EvrOiGWzzzzDLp06YKysjJERUXhwQcfRHh4OGbMmGFEMiYmBosWLcKYMWMwZcoUWOI4ePBg8/f58+ejf//+yMvLU7H08sLbOUwHAzvU/mej/JSfjIDMuj75X9z53RHb4goc/XoNCvdtloHx0rpOiOXmzZsxYsQIrF69GitWrMCCBQvQqlUrNGzYEAMGDDD/njVrFoYNG4ZRo0ahT58+mDdvHiZMmICWLVuiadOmRkhrKxpZ1kao5r/Xp5tNRsKetfKzx82yUn7KzyKQcFFvxJzdHvlfLUPRj5/JwHhpXSfE8uWXX0a7du3MdCunXfv164eXXnoJGzZswLhx48ApWgrqwIEDMX78eJSWlhoh7d27N+bMmYO+ffuaqLS2omJZGyEVSxkh5af8ZvsTgaju+vSwkdj2JjRo0gZ5X3yA4syvRFy8Na4TYrl27Vojdj/88APatm1rosRHHnkES5YswcSJE01fLLHcsWOHiSyXLl1qotGMjAy0bt3a2E2dOtVEnCyDBg0yP+5l7Nix3rLR45SAElACSqAOEvjqWBqOlMfg4oaHkBzh/+25LASzZ5/6MBTSsWPHqkAw4oKbadOmoVevXigoKACjTL6TPHr0KAoLCzF8+HDz/pJRZufOnU8s9ElMTMTMmTMxffp0DBkyBJMnT8bIkSPRvXt3j4uBNLKUXdH69GQqI2HPWvnZ42ZZKT/lZxFI7ng7opLPwZFP3kFZ7o8yMF5an/bIMiEhAcuXL8dDDz2E7du3m2nV3bt3myiTjaOI8l0lp2a7det2oluTJk3CwoUL0axZM3Tt2hVDhw7F4sWLzfGepmRVLL30Cg+H6WCl/GQEZNbqf8rPIpByxb2ISGiC7I1vovxolgyMl9anXSzZTr6X7NGjB44fP46ioiKzqCcnJ8dMqfLdZEhICEaPHm0W/rC0aNHC2LDx0dHRYGicmppqVsU+99xzHruuYumlV6hYykApP+XnFwKySuvTw0bq1QMQ3rARsj6ejuNFNX8FIaP2P+s6IZZsDj8PadKkCfbu3XtS39q0aWN+x+8orZKWlmb+mZXl2xOFiqXMberTzSYjYc9a+dnjZlkpP+V3QgO6PoKw6HgcWvMPVJYVycB4aV1nxNLL9ooOU7EU4TORfHUvuGW1Omet7ZOxVH7KT0ZAZu2L/6X3eByhEdHIXDkBVcdrTkYja1UdjCyd6lBN9ahYyij74syyM9mz1vbZ46aRm4yb8gssv5DQMDTuNQwICUXGsnHOnNyLWjSy9AJSoA7RwV5GWvkpPxkBmbX6X2D4hUY0QHqPoaiqrEDmigmyk/pgrWLpAyx/H6o3m4yw8lN+MgIya/W/wPALa5CAtGsGmXeVfGcZqKJiGSjSXpxHbzYvINVwiPJTfjICMmv1v8DwC49NRepVD6CiKBeHP35VdlIfrFUsfYDl70P1ZpMRVn7KT0ZAZq3+Fxh+kYlnodHld6O84BCyN82UndQHaxVLH2D5+1C92WSElZ/ykxGQWav/BYZfVKNzkXzZbSjL3Y8jn8ySndQHaxVLH2D5+1C92WSElZ/ykxGQWav/BYZfdPqFSGp3K0oOf4Pcz/4jO6kP1iqWPsDy96F6s8kIKz/lJyMgs1b/Cwy/Bk3bIrHNjSjO2Im8LxfKTuqDtYqlD7D8fajebDLCyk/5yQjIrNX/AsOvYfPLEH/htSj64VPk71ouO6kP1iqWPsDy96F6s8kIKz/lJyMgs1b/Cwy/2HOvRNx5XVH43SYc/Wat7KQ+WKtY+gDL34fqzSYjrPyUn4yAzFr9LzD84s7rhthzOxuhpGAGqqhYBoq0F+fRm80LSDUcovyUn4yAzFr9LzD84i+6Dg3P7mCmYDkVG6iiYhko0l6cR282LyCpWMogKT/l5zcCsoq9Hf+4uIeLfLi4h4t8AlVULANF2ovzeOssXlTll0O0fTKsyk/5yQjIrOuL/yVd+nNEp11gPhvh5yOBKiqWgSLtxXnqizN70VW/HKL8ZFiVn/KTEZBZe+t/yZf9ClGNWpiEBExMEKiiYhko0l6cx1tn8aIqvxyi7ZNhVX7KT0ZAZl1f/K/RFfcgMqEpsjf9E+UFmTIoPlirWPoAy9+H1hdn9jcnT/UrPxl55af8ZARk1t76X+pVDyI8NgWH17+GimM5spP6YK1i6QMsfx/qrbP4ux0qRv4hrNdXxlX5KT8S4PZc3KYra+1UHC8tlEHxwVrF0gdY/j5UBwMZYeWn/GQEZNbqf4Hhx42fuQF05qqJqKook53UB2sVSx9g+ftQvdlkhJWf8pMRkFmr/wWAX0goGvcahpDQMGQsGyc7oY/WKpY+AvPn4XqzyegqP+UnIyCzVv/zP7/QiGik93gcVZXHkblivOyEPlqrWPoIzJ+H680mo6v8lJ+MgMxa/c///MKi45HW9RFUlhfj0OrJshP6aK1i6SMwfx6uN5uMrvJTfjICMmv1P//z4ypYroY9XpyPrHWvyE7oo7WKpY/A/Hm43mwyuspP+ckIyKzV//zPLyKhKVKuuAflR7OQvfFN2Ql9tFax9BGYPw/Xm01GV/kpPxkBmbX6n//5MXMPM/iU5f6II5+8Izuhj9Yqlj4C8+fherPJ6Co/5ScjILNW//M/v+i085F06S9Qmr0XOZ/OlZ3QR2sVSx+B+fNwvdlkdJWf8pMRkFmr//mfX4OmbZDY5iYUZ36FvC8+kJ3QR2tHxTIyMhJlZYH7SNTHvuKTTz5Bp06dfDUL2PF6s8lQKz/lJyMgs1b/8z+/mLM7IOGi61D042fI/2qZ7IQ+WovE8vrrr8fvf/979O3bF++//z4aNGiAZcuW4emnn/axGYE5XMVSxlkHA+UnIyCzVv9TfrEtOiPu/G4o3LcZR79eIwPio7VILNeuXYvo6GisWrUK1157LUpKShAVFYUuXbrUyQhTxdJH73A7XAcr5ScjILNW/1N+ced1Rey5V+LoN+tQ+N0GGRAfrW2LZWxsLFavXo1XX30Vt99+O/j/Dz74IN58802MGDECK1eu9LEp1R+elJSE/Px8VFZWeqyP56ZQV1RU1HhOFUvZJdHBSvnJCMis1f+UX/yFvdCweUcU7F6BY/u3yYD4aG1bLHmeDRs2oKCgAI0aNcK+ffuQk5ODyy67DL1790Zubq5XTWnbti1eeOGFE8fGx8cb28GDB2PGjBlGJGNiYrBo0SKMGTMGU6ZMMcJMceQx/Pv8+fPRv39/5OXlqVh6Rd3eQTpY2eNmWSk/5ScjILOuD/6X0KYPYppegrwdH6L44JcyID5ai8SS7yb79etnTslocuzYscjMzMQtt9ziYzN+Orxdu3aYNm0abrvtNowcORINGzbEgAED0KpVK8yaNQvDhg3DqFGj0KdPH8ybNw8TJkxAy5Yt0bRpUyOktRWNLGsjVPPf68PNJiMgs1Z+yk9GQGZdH/wvqd2tiE6/ELmfv4+SrK9lQHy0Foklz0Wh4vRnVlYWbrjhBixZssTHJvzv8IULF2LTpk0YPXo05s6dayLXcePGITQ0FJs3b8bAgQMxfvx4lJaWGiFlBDtnzhyzwMibVbgqlrYvjTGsDzebjIDMWvkpPxkBmXV98L/kDrchKuVc5Gz9N0pzvpcB8dHallhSyMLDwz2e6t577zXTs76UDh06mKiye/fuZoqV064U3okTJ5pqLLHcsWOHiSyXLl1qotmMjAy0bt0anM6dOnWqiThZBg0aZH7cC6NfLUpACSgBJXDmEdhe2AQFFVG4NC4DcWGlAe/A7NmzTzlnSMeOHas8tYQRWk2FYpadne1TR9544w3z/vGhhx4ydhTOwsJCDB8+3Agzo8zOnTufWOiTmJiImTNnYvr06RgyZAgmT55spm4ptp4WA2lk6dMlOeXg+vBkKiMgs1Z+yk9GQGZdH/wv5cr7ERGXhsPrX0fFsSMyID5a24osOS3KwuiPi2+effZZ7Ny50yzUadasGXr16mWiQ18KxfAvf/mLqZOFAsjGsS6+q+S70W7dup2octKkSeC0Lc/XtWtXDB06FIsXLzbHe5qSVbH05Yqcemx9uNlkBGTWyk/5yQjIrOuD/6VdMxBhDRKR9dHLOF7i2+yljJ7n11A1RpY8KbP1rF+/Hl9//TXuuusu047f/OY3uPvuu33+dISLdP79738bMSwqKjJ1cVUsp1T5bjIkJMS8x1ywYIH5W4sWLcy7TF58fufJ0Dg1NdWsin3uuec8MlGxlLlLfbjZZARk1spP+ckIyKzrg/+ld38MoZExOLTqJVRW+BaQyegJxJInplhSNDnlykU3XOxTVVVl3inyMxInSps2bbB3796TItW0tDRTNRcV+VJULH2hpZGljJbyU35OE5DVd+aLZQgaXzsMIaHhyFj+AlDl+dt7GanqrW1Nw1pVMRL87W9/i8aNG5voj4t6mKTgX//6lz/aKq5TxVKG8My/2WT9l1orPxlB5Rfc/ELCo9C45xNAVRUylj8vg2HD2rZYMqLklOf27dvx7rvvmmnQAwcO2GhC4ExULGWsdbBSfjICMmv1v+DmFxYVh7Rug1FZXoJDq1+SwbBhbVsseS5OwzKi5AKb2lLN2Wib4yYqljKkOlgpPxkBmbX6X3DzC2/YCKlXDzALe7jAJ9BFJJZccNOkSRMcO3YMhw4dMu8rWfj5Bz/7qGtFxVJ2RXSwUn4yAjJr9b/g5hcR3xgpne9DReFhHN4wQwbDhrVILJkowPqMxPXcN910k8+Lb2y03WcTFUufkZ1koIOV8pMRkFmr/wU3v8jk5mjU8Q6U5R3AkS2BXxcjEsuEhIRqxdLbJOqyS++7tYql78xcLXSwUn4yAjJr9b/g5hedeh6S2vdFafZ3yPn0PRkMG9YisUxJSTHfVl5zzTUmy862bdvw0ksvYffu3Taa4n8TFUsZYx2slJ+MgMxa/S+4+TVocjES2/4MxZm7kPfFT9/cB7KIxJIp6i655BLzrpIp5sLCwkz2nB49eniV2DyQHeW5VCxlxHWwUn4yAjJr9b/g5hfTrD0SWvdG0YHtyN9pf8MOuxRtiyU3ZV62bBm+//57k0mHYvn8888boXziiSfMStm6VlQsZVdEByvlJyMgs1b/C25+DVtcgfjzu+PY91tQsGe1DIYNa9tiyUTmy5cvx9atW/HII4+YU1vp7rjnJPO01rWiYim7IjpYKT8ZAZm1+l9w84trdQ1iW16Fo3s/RuG3gQ/GbIslL9vatWtNIvW8vDyUl5ebxATFxcU17vwhu9wyaxVLGT8drJSfjIDMWv0vuPnFX9gTDZt3QsHulTi2f6sMhg1rkVi2a9cOf/jDH3DOOeeY5AT81pI7j6xcudJGU/xvomIpY6yDlfKTEZBZq/8FN7+Ei29AzFntkL9zMYoOfCGDYcNaJJY8H3cFiY2NNYt8GGF62h7LRtscN1GxlCHVwUr5yQjIrNX/gptf4iW3oEHji5C7fT5KDgX+iwuRWN577714/PHH8fLLL2PPnj0YP348/vnPf5rPR+piUbGUXRUdrJSfjIDMWv0vuPkld/glolJaImfbeyg98p0Mhg1rkVjynSX3k+TCnoyMDCOUTLCuGXxsXAl43i/NXm3OW+lgJWOq/JSfjIDM+kz3v0ad7kJkUjOTvYdZfAJdbIultfnz559/bnLBsgwYMACPPvoo/vznP5/YqDnQHarpfBpZyq7GmX6zyXovt1Z+MobKL7j5pVzZHxFx6SYvLPPDBrrYFks2dOPGjSZzz6pVq0zidG76HBERgX79+mH//v2B7kut51OxrBVRjQfoYKX8ZARk1up/wc0vtcvDCI9JQta6V3C8OF8Gw4a1SCz5zvKxxx4zAmkVfnv51FNP2WiK/01ULGWMdbBSfjICMmv1v+Dml97tUYRGNcSh1ZNRWV4sg2HDWiSWPB93HbnjjjvAPLFvvfUW6moSdbZVxdKGh7iY6GCl/GQEZNbqf8HNr3GvYQgJi0DmivGoqjwug2HDWiSWV1xxBSZNmmSmYrdv345mzZqZFHjjxo2z0RT/m6hYyhjrYKX8ZARk1up/wcuPIkmxZMlYdnr0RSSW69atQ1RUFI4fP46dO3ea6LJp06a6GtamT+tgYBPcf82Un/KTEZBZq//5jx+nXzkNW1VRhsxVE2UnsmltWyyZjGDNmjUmkqRAUjDfffddPPfcc3j66aexdOlSm03yn5lGljK2OhgoPxkBmbX6X/Dy48IeLvA5XnIUWR9Nk4GwaW1bLHm+LVu2mFywJSUlZtcRZvFhdNm1a1fzu7pWVCxlV0QHK+UnIyCzVv8LXn4R8elI6dwfFceO4PD612UgbFqLxHLEiBFmey7mhbXK5s2bzbeWdbGoWMquig5Wyk9GQGat/he8/CKTzkajTneiPP8gsje/LQNh09q2WHJRz3XXXWfywV588cU477zzzGrTuXPn2myK/81ULGWMdbBSfjICMmv1v+DlF5XaCsnt+6H0yD7kbJstA2HT2rZY8pMRJiU4duwYevbsafP0gTVTsZTx1sFK+ckIyKzV/4KXX4PGrZF4yc0mgToTqZ+OYlss2djZs2fj3HPPxaeffopdu3aZd5YskydPrpO7j6hYylxMByvlJyMgs1b/C15+Mc0uRULr683WXNyi63QUkVjy/SQjTPeiidTtXUodDOxxs6yUn/KTEZBZq//5j1/Dcy5H/AU9zKbP3Pz5dBSRWN55553mO0v38s4772hkaeNq6s1mA5qLifJTfjICMmv1P//xi215NeJadUHht+txdO/HshPZtBaJpc1znjYznYaVodfBQPnJCMis1f+Clx+jSkaXBXtW49j3W2QgbFqrWNoE5w8zHQxkVJWf8pMRkFmr//mPH99X8r1l/s4lKDqwXXYim9Z1Riz57rNx48Y4ePDgSV1JSkpCfn6+SXrgqcTGxpokCBUVFTVi0MjSppf810wHA+UnIyCzVv8LXn5cCcsVsXlfLEBx5i4ZCJvWIrGkwA0fPhxMqL5p0yaUlpaalHdZWVk+Nef+++/HwIEDkZOTY96B/vGPfzSiOWPGDCOSMTExWLRoEcaMGYMpU6bAEsfBgwebv8+fPx/9+/c333zWVFQsfbospxysg5XykxGQWav/BS+/pPb9EJ3aCjmfzkVp9l4ZCJvWIrGcNm0aOnXqZE7NXUdatWpl/s3vLmuKBN3bylW1DzzwgEnG3qVLFx3wNh8AACAASURBVISFhZnMQMw/O2DAAFPvrFmzMGzYMIwaNcpsMj1v3jxMmDABLVu2NLlpKaS1FRXL2gjV/HcdrJSfjIDMWv0vePk16nQHIpOa48gn76Is9wcZCJvWtsWSUSWjyUOHDqG8vNzsY/nNN9+gX79+GDJkiMkb601JTEwEN4zOzs5Go0aN8N1334ER46uvvooNGzaY7b54Lgoqo8/x48ebCJZC2rt3b8yZMwd9+/b1avWtiqU3V8TzMTpYKT8ZAZm1+l/w8kvpfB8i4hsje+NMlB89JANh09q2WEZHR4NbdK1fvx4JCQlm15G1a9di6NChJkr88ssvvWoSI1NGqKxn5syZ+Pvf/252LOnWrRuWLFmCiRN/2o7FEssdO3aYyJLHMDdtRkYGWrdujbZt22Lq1Kkm4mQZNGiQ+XEvY8eO9apdepASUAJKQAnUDQJbC85CcWUEOsYfQIPQ8tPWKCbicS8hHTt2/CkdTw2FgpWcnHwicw8Tqh89etSn9HfNmzc3+WStRAbc3qtz585GBAsLC807UeahZZTJ31vTu4xIKa7Tp083kSyzBo0cORLdu3f3OAWskWVtV7Tmv+uTvfKTEZBZq/8FL7+0bkMQFhWLQ2umoLLsmAyETWvbkSXPd/bZZ5v3iFzgw0iTQsl3iowSvS2cYqUQvvzyy2ZBD5Wb7z+5SIiN69WrlzkHp3cZbVpl0qRJWLhwIZo1a2a2BGNEu3jxYnN8WVlZtadXsfT2qlR/nA5Wyk9GQGat/he8/Br3/DVCwiORufJFVB0/PZGlLbHs0KHDSdtyuV/Czz77zKcFPvfdd5+JDrmwh+8uOX1K4eWUKt9NMmIdPXo0FixYYE7VokUL8y6TjadIU2BTU1PNqlhuPu2pqFgG780m67kz1jrYyzgqv+DkFxIajsbXDgMQgoxl42QQBNa2xJKiU1OxkxuWEWaTJk1w4MCBk6pu06YN9u7de9Jm0mlpaeYYXz9RUbEUeApgHk6qm7OX1eqctbZPxlL5KT8ZAZm1J/8LjYxBevfHTETJyPJ0FVtiyU82GAV6Kk899RSKiopOV580svQTeR1MZWCVn/KTEZBZn6n+F9YgEWnXDERlaSEOrZ0qgyCwtiWWrufjAp/bb7/dfN7x3nvv+RztCdrus6lGlj4jO8ngTL3ZZL12zlr5yVgqv+DkFxGXhpQr70fFsRwcXv+aDILAWiSWXKnKnUdcCz8Z4acjdbGoWMquig5Wyk9GQGat/hec/CITm6HR5XehvCAT2Zv+KYMgsLYtltbnHFx8w08/uAL11ltvNanp7r77buzZs0fQLP+YqljKuOpgpfxkBGTW6n/ByS8qpSWSO/wSpTnfI2frv2UQBNa2xdLK4LNv3z6z8IOFmXcefvhh3HPPPdi9e7egWf4xVbGUcdXBSvnJCMis1f+Ck190+kVIancLSrK+Ru7n78sgCKxtiSUTn3OBD6dg+c6SKee44wcjy4iICPM9pKdvHQVtFZuqWMoQ6mCl/GQEZNbqf8HJL+asdki4+AYUH/wSeTs+lEEQWNsSS398OiLog9emKpZeo6r2QB2slJ+MgMxa/S84+TVs3gnxF/bEsf3bULB7hQyCwNqWWDJpQE2fjrz22msaWdq4KDoY2IDmYqL8lJ+MgMxa/c8//GJbXoW4Vteg8LsNOPrNOtlJBNa2xNI6X3x8PPhNJTPqWIUbON92221mb8q6VjSylF0RHQyUn4yAzFr9Lzj5xZ/fHQ1bXIGjX69B4b7NMggCa5FYWonU3c9//fXXq1jauCg6GNiAppGlDJryU36OEZBV5Gn8S2jdGzHN2iP/q2Uo+vEz2UkE1rbFMjIy0iRM50/79u3BfLBc5MOk6lzg48vmz4L2+2SqkaVPuE45WMVc+ckIyKzV/4KTX2Lbn6FBk4uR9+VCFGfslEEQWNsWS2tDZn4iws2YudXWn/70J7z44ot44oknfNp5RNB+n0xVLH3CpWIpw6X8lJ/DBGTVnakPG0nt+yI69TzkfvYflBz+RgZBYG1bLHlO7vLRtGlTzJo1C3fccceJZug0rL0rcqY6s73eOm+l/GRMlZ/ykxGQWXvyv+SOtyMq+Rwc2ToLZTn7ZScRWIvEkttncXutd999F48//ji4Qwjzw/KnLhaNLGVXRQdT5ScjILNW/wtOfilX3IuIhCYm1R1T3p2uIhLLKVOmmI2ZrQQEl156Kf7617/i3nvv1QU+Nq6oDgY2oLmYKD/lJyMgs1b/8w+/1KsHILxhI5NEncnUT1cRieWmTZuQm5uLAQMGYNSoUWZxD4ud/SwDAUAjSxllHQyUn4yAzFr9Lzj5pXUdjLDoOGStnYrjpYUyCAJrkViOGzcOPXv2PHH68vJyvPHGG3jllVcETfKfqYqljK0OVspPRkBmrf4XnPzSez6B0PAoZK6ahKqKUhkEgbUtseSiHqsweTojSX4qwq258vLycPDgQUGT/GeqYiljq4OV8pMRkFmr/wUfv5DQMDTuNQwICUXGsucBVMkgCKxtiaXmhhUQr8FUBwMZV+Wn/GQEZNbqf87zC41ogPQeQ1FVWYHMFRNkJxBa2xLLqVOngkkJPBV+Z3ns2DFh05w318hSxlQHA+UnIyCzVv8LPn5hDRKQds0gVJYV4dCaf8gACK1tiaXrOfn5yAUXXHBSMz7//HPN4GPjwuhgYAOai4nyU34yAjJr9T/n+YXHpiL1qgdQUZSLwx+/KjuB0Fokls899xx69+6NkJCQk5qhq2HtXRW92exxs6yUn/KTEZBZq/85zy8y8Sw0uvxulBccQvammbITCK1tiyXT3W3cuNGcfufOnSYvrFV+/etf6zSsjQujN5sNaBpZyqApP+XnGAFZRdWNf1GNzkXyZbehLHc/jnwyS3YCobVYLLdu3YohQ4YImxEYc31nKeOsYq78ZARk1up/wccvOv0CJLX7uckJy9ywp7PYFks2esGCBeD+lcuWLTspY8+kSZN082cbV1UHAxvQNDKSQVN+ys8xArKKqhv/GjRti8Q2N5rdRrjryOksIrHcvHkzOB3rXvSdpb1LqmJpj5tlpfyUn4yAzFr9z3l+DZtfhvgLr0XRD58if9dy2QmE1iKx/PnPf44GDRqc0oS5c+dqZGnjwujNZgOaRkYyaMpP+TlGQFZRdeNf7LlXIu68rij8bhOOfrNWdgKhtc9iyUiSeWC56XOHDh2qFcu///3vKCkpETbNeXN9ZyljqmKu/GQEZNbqf8HHL+78boht0RlHv/kIhd/9tKD0dBWfxTImJgZr1641K2GZOF2nYZ27dDoYyFgqP+UnIyCzVv9znl/CRdch5uwOKNi1Asd+2CY7gdDaZ7GkOPbr1w+7du3Cueeei6ioqFOa8P7775/0KYmwjY6Za2QpQ6mDgfKTEZBZq/8FH7/ENjehQdM2yNvxIYoPfikDILT2WSx5vpEjR56SiMC1HePHj9dpWBsXRgcDG9BcTJSf8pMRkFmr/znPL+nSXyA67Xzkfj4PJVl7ZCcQWtsSy0AmUk9KSkJ+fn6N6fNiY2ONOLsmRqiOi0aWMm/RwUD5yQjIrNX/go9f8mW/QlSjFsjZNhulR/bJAAitbYklV8FW967Sagu/v6xNuFzbPWXKFLRt2xaFhT9t7Pn888/jm2++wYwZM4xI8j3pokWLMGbMGPBYSxy5PRj/Pn/+fPTv399sD1ZTUbGUeYsOVspPRkBmrf4XfPwaXXEPIhOa4sjmt1GWf3q3frQllq6X7Prrr0enTp1Ouoq+TsNSXF944QWzwrasrMzU9dJLL4FJ2gcMGIBWrVph1qxZGDZsmFmJ26dPH8ybNw8TJkxAy5Ytwf01KaS1FRXL2gjV/HcdrJSfjIDMWv0v+PilXvUgwmNTcHjDG6gozJYBEFqLxJLfUzZv3vyUJvialIAiGR4ebt6D7tmzB4888ghmzpyJDRs2YNy4cSaKZQKEgQMHgkJcWlpqhJRJ3OfMmYO+fft69V2niqXMW3SwUn4yAjJr9b/g45fW9RGERccj66OXcbykQAZAaG1bLClu/HykqKgIb775JsrLy0805d133/VKvCwDHs89Mg8ePIjXX3/dCCAj1iVLlmDixInmMEssd+zYYSLLpUuXYsSIEcjIyEDr1q3NNC7rYMTJMmjQIPPjXsaOHStEpuZKQAkoASUQCAIb85ujoioUVybsR3hIZSBOWeM5Zs+efcrfQzp27FhVW8sYEe7duxf33XdfbYd6/DujRmYBsjaL5nQso9UjR46Yd5jDhw83USejzM6dO59Y6JOYmGiiz+nTp5tE7pMnTzardLt37+5xMZBGlrYvkzHUJ3vlJyMgs1b/CzJ+IaFo3GsYQkLDkLH8BaDq9Iql7ciSl43RXXJyMrKysk7akuvBBx88sVintsvL942MBhkF7t69G/xGkxElI1Y2rlevXuZdJb/t7Nat24nqmKx94cKFaNasGbp27YqhQ4di8eLF5njrvaf7uVUsa7saNf9dByvlJyMgs1b/Cy5+oeHRSO/5OKoqjyNzxXhZ5x2wFomlU4nUKXzMBsRy4MAB886SgkcR5btJvsscPXq02eWEpUWLFuZdJhsfHR0NhsapqalmVSw3pPZUVCxlHqODlfKTEZBZq/8FF7+w6DikdR2MyvJiHFo9WdZ5B6xFYhkZGVntJyR28sJyWpXTrdnZJ694atOmjZnqda0zLS3NdJ0RrS9FxdIXWqceq4OV8pMRkFmr/wUXv/CGjZB69QAcL85H1rpXZJ13wFokltOmTTORn3vhO0Trm0kH2uhYFSqWMpQ6WCk/GQGZtfpfcPGLSGiClCvuRfnRLGRvfFPWeQesRWLp1DSsA/3wqgoVS68weTxIByvlJyMgs1b/Cy5+UcnnILnj7SjL/RFHPnlH1nkHrEViefbZZyMsLMw0Iz4+Hn/7298QFxeHnj17+pTBx4F+eFWFiqVXmFQsZZiUn/LzEwFZtWfawwZzwjI3bGn2XuR8OlfWeQesRWLpfv6nn37arFrlytZt207vdirVsVGxlHnMmXazyXrrvLXykzFVfsHFr0GTNkhsexOKM79C3hcfyDrvgLVILLmvJRf5WIULdFiYUCAnJ8eB5jlbhYqljKcOVspPRkBmrf4XXPy4jyX3syz68TPkf7VM1nkHrEViuXLlSvPpBgsTmhcUFJjvJF955fSvXNLI0gHvcKtCBysZU+Wn/GQEZNZnmv/FtuiMuPO7oXDfZhz9eo2s8w5Yi8TSgfMHtAqNLGW4z7SbTdZb562Vn4yp8gsufnHndUXsuVfi6N51KPx2g6zzDlirWDoA0akqdDCQkVR+yk9GQGat/ucsv/gLr0XD5pehYPdKHNu/VVa5A9Yqlg5AdKoKvdlkJJWf8pMRkFmr/znLL7HNjWjQtC3ydy5G0YEvZJU7YO2zWHIRD7fKWr58OXr06IGPP/4YO3fudKAp/q9Cp2FljHUwUH4yAjJr9b/g4pfU7lZEp1+I3O3zUXJot6zzDlj7LJbM2LNmzRrs27fP7A7C/SfdPxPhDiCekpk70GbbVahY2kZnDHWwUn4yAjJr9b/g4pd82W2IanQucj59D6XZ38k674C1z2LJc65bt+7EKtjq2uDr5s8O9MOrKlQsvcLk8SAdrJSfjIDMWv0vuPg1uvxuRCaehSNb3kFZ3o+yzjtgbUssuS3XddddZ/aaZFTJ7y1dy3vvvaeRpY2Lo4OBDWguJspP+ckIyKzV/5zll3LVA4iITTV5YZkf9nQXW2JpNfqCCy5Abm4ubrjhBpPm7p133kFeXt7p7pPH82tkKbs0OhgoPxkBmbX6X3DxS7tmEMIaJCBr3XQcLz79uiISyzvvvBNPPvmk2W/SKqtWrcJvf/tb2VX1k7WKpQysDlbKT0ZAZq3+F1z80rs/htDIGBxa8w9UlhXJOu+AtW2xDA0NNdOvERERWLp0KYqLi3HjjTeiQYMG6Nu3L3744QcHmudsFSqWMp46WCk/GQGZtfpfcPFrfO3/ISQ0HJkrJqCqskLWeQesbYsldxz5z3/+A6a8GzFihGlK//798cQTT+DPf/4zFixY4EDznK1CxVLGUwcr5ScjILNW/wsefiHhkWjc89dAVRUylj8v67hD1rbFkpHlpk2bTET5/PPPm82eOSWbnp6O22+/Hd9++61DTXSuGhVLGUsdrJSfjIDMWv0vePiFRcUirdsQVJaX4NDql2Qdd8jatljy/OPGjTN7V7qWzz//HA899JBDzXO2GhVLGU8drJSfjIDMWv0vePiFN0xG6tUP4XhJAbI+elnWcYesRWLJNrRt2xb3338/YmNjzWpY989IHGqnI9WoWMow6mCl/GQEZNbqf8HDLyK+MVI634eKwsM4vGGGrOMOWYvF0qF2BKQaFUsZZh2slJ+MgMxa/S94+EUmN0ejjnegLO8Ajmz5l6zjDlmrWDoE0olqdDCQUVR+yk9GQGat/uccv+jU85DUvq9Jc8d0d3WhqFjWhavw3zbozSa7GMpP+ckIyKzV/5zj16DJxUhs+zMUZ+5C3hd148sKkVimpKRg2rRp4GckW7duRVRUFN5++23zOUldLDoNK7sqOhgoPxkBmbX6X/Dwi2nWHgmte6PowHbk71wi67hD1iKxXLRoEdLS0kxTtm/fjhYtWpiFPlwhe+zYMYea6Fw1KpYyljpYKT8ZAZm1+l/w8GvY4grEn98dx77/BAV7Vsk67pC1bbGMjIzE+vXrQQFi1p7jx49j8eLFJkEBf+pidKliKfMaHayUn4yAzFr9L3j4xbW6BrEtr0Lht+txdO/Hso47ZG1bLLkJ9MaNG7F//36Ul5ejoqLCJFW/8sorcfPNNyMzM9OhJjpXjYqljKUOVspPRkBmrf4XPPziL+yFhs07omDPahz7fous4w5Z2xZLnv+tt97CRRdddFJTDh48iFtvvdWh5jlbjYqljKcOVspPRkBmrf4XPPwSLu6DmLMuQf5XS1H04+eyjjtkLRJLprzr06cPbrvtNpPmjinunnnmmTq7TZeKpcxrdLBSfjICMmv1v+Dhl9TuFkSnX4S8Lz5AceZXso47ZC0Sy6FDh5r3le6FU7BMss58sXWpqFjKroYOVspPRkBmrf4XPPySO/wSUSktkfPZXJQe3ivruEPWIrHcvHkzGF1WV0pKSkzUWZcEU8VS5jU6WCk/GQGZtfpf8PBr1OkuRCY1w5FP3kVZbt3Y7lEkltyGi5+OfPDBB2aRD99VcqHPnj170L59e0yaNAkzZ870+gpzhW3Tpk2xb9++EzZJSUnIz89HZWWlx3r4uQrFmeeuqahYen0pqj1QByvlJyMgs1b/Cx5+KVfej4i4NGRvmonygkOyjjtkbVssrU9Hdu/ejXvuucc056WXXsJVV12Fa6+9FsuXLzciyr0tvS1cMESx7NWrF5o3b44ZM2YYkYyJiQG/6RwzZgymTJlivuWkOA4ePNj8ff78+WYvzby8PBVLb2HbOE4HKxvQXEyUn/KTEZBZn0n+l9plIMJjEnH441dRUZQr67hD1rbFkuffsmULQkJC8OGHH6KsrMxEllVVVfjHP/6Bxx9/HM8++6x5d+lNGTJkCH75y1+aaV2KJYW3YcOGGDBgAFq1aoVZs2Zh2LBhGDVqlJnenTdvHiZMmICWLVsagaWQ1lY0sqyNUM1/P5NuNllP/WOt/GRclV/w8Evv/ihCIxvi0NopqCytGwluRGJJ8WJUScFkoVAy3V1CQgKuu+463HjjjV69s+zQoQNeeOEFkzqP0SLFcu7cudiwYYPZM5MCyvejAwcOxPjx41FaWmqEtHfv3pgzZw769u1rxLq2omJZGyEVSxkh5af8ZvsTgajuM+lho3GvYQgJi0DmyomoOl772C4C46WxSCx5DiYnoChyapRRJKdHGe1xRWxRUVGtzaAdp1iffvpppKammoiUYsnfLVmyBBMnTjR1WGK5Y8cOE1kuXbrUZArKyMhA69atzb6aU6dONREny6BBg8yPexk7dmytbdIDlIASUAJK4PQQOF4Vgg3555iTX5P4v/Urp6c1J5919uxTH4ZCOnbsWFVb4+Lj4/HUU0+ZnLBWady4sfnuMicnpzZz83duHE2BLC4uNsIbERFhFvhkZ2ebqHT48OHm94wyO3fufGKhT2Jiolk8NH36dHAKd/LkyRg5ciS6d+/ucTGQRpZeXRKPB51JT6aynvrHWvnJuCq/4ODH6VdOw1ZVlCFz1U/BUl0oosiS0V1ycvIp/bj++uu9FkuK3gUXXGDquPrqq9GvXz8zFUvRY+MYZXK6l7/v1q3biXNxpe3ChQvRrFkzdO3aFfzmk7lpebynKVkVS5nL6WCl/GQEZNbqf8HBLzwmCaldHsbxkqPI+miarNMOWtsWS2s1LJOp8zORzz77zHy6ccUVVxhRq+lTD0/tZ07Z3/zmN0bwGLVySpXvJvlOdPTo0eCnKiyMZPkuk42Pjo4GQ2NO4XJV7HPPPecRj4qlzHN0sFJ+MgIya/W/4OAXEZeOlCv7o+LYERxe/7qs0w5a2xZLa9ENPx3hght+6vGnP/0JL774Ip544gmzI4kTpU2bNti7d695F2oVa1uwrKwsn06hYukTrlMO1sFK+ckIyKzV/4KDX2TS2WjU6U6U5x9E9ua3ZZ120Nq2WLINjOT42QY/67jjjjtONMuXaVgH+1JrVSqWtSKq8QAdrJSfjIDMWv0vOPhFpbRCcod+KD2yDznb6s7qYpFYMrvOnXfeiXfeeccs0mEU+P777+Pdd9+VXVU/WatYysDqYKX8ZARk1up/wcGvQePWSLzkZpQc2o3c7fNlnXbQ2pZY8j1iXFycEUZGl6+//tO8cpcuXcyK1AceeABffvmlg810pioVSxlHHayUn4yAzFr9Lzj4xZx1KRIuvh5FB79A/o7Fsk47aG1LLLkSlStXPRUmJKgt9ZyDffC6KhVLr1FVe6AOVspPRkBmrf4XHPwantMJ8Rf0xLH9W1Gwe6Ws0w5a2xLLu+++G3wvyWnX3NxccMNnq1CQ+M1jXSwqlrKrooOV8pMRkFmr/wUHv9iWVyOuVRcUfrcBR79ZJ+u0g9a2xNI6P7+HXLduXZ2ccq2OkYqlzHN0sFJ+MgIya/W/4OAXf0EPNDznchz9ei0K922SddpBa5FY3n777XjooYfAhT6uhenovM3g42Bfaq1KxbJWRDUeoIOV8pMRkFmr/wUHv4TW1yOm2aXI37UcRT98Kuu0g9YiseS3lExOwG8gXZMQcPcRfWfp+1XSwcB3Zq4Wyk/5yQjIrNX/nOHHlbBcEZv35SIUZ+yQVeqgtW2xZL7WjRs3Ytu2bdUmLHewjY5VpZGlDKUOBspPRkBmrf4XHPyS2/dDVGor5H7+PkqyvpZ12kFr22LJNnCXkfT0dLNjiGs2nV27dtlKd+dgv6qtSsVSRlgHK+UnIyCzVv8LDn7M3sMsPjlb/43SnO9lnXbQWiSW3DaLae/cy0033XSSeDrYXlFVKpYifCYXb3Vb1Mhqdc5a2ydjqfyUn4yAzNryv5TO/RERn47szW+hPD9DVqmD1iKxfP75502ic/fCbbWOHasbu1u7tk3FUuY5OpgqPxkBmbX6X3DwS+3yEMJjkk0SdSZTrytFJJbsxKWXXoobbrgBX331ldnwecuWLXWlb6e0Q8VSdml0sFJ+MgIya/W/4OCX1m0IwqJizfZc3KarrhSRWPI7y4cfftj0Zfv27WjVqhUOHDgAJi2oi0XFUnZVdLBSfjICMmv1v+Dg17jnrxESHolDqyahsqJU1mkHrUViuWnTJpSXl5vNlvft22cW9XBvS4rlnj17HGymM1WpWMo46mCl/GQEZNbqf/Wf33tz/oPG1/6f6WjG8ueBqipZpx20ti2WfFe5Zs0aLF26FGeddRaOHz9udhvh5svcwHnt2rUONtOZqlQsZRx1sFJ+MgIya/W/+s9vzryFSO/+GKqOlyNz5YuyDjtsbVss2Q4mJeD3lhUVFSaqjIiIMP+96qqrHG6mM9WpWMo46mCl/GQEZNbqf/Wf39wPliHtmoGoLC3EobVTZR122FoklkymzpWvycnJJ5r12muvYerUutVJq3EqljLv0cFK+ckIyKzV/+o/v/cXr0HKlfej4lgODq9/TdZhh61FYsm2XHPNNaZJjRo1QlFREZYtW+ZwE52rTsVSxlIHK+UnIyCzVv+r//zmLVuPRpffjfKCTGRv+qesww5bi8Ry7NixuPbaazFlyhSzoOfFF1/Ejh07cP/99zvcTGeqU7GUcdTBSvnJCMis1f/qP7/5qz5BcofbTOYeZvCpS0Uklhs2bDCrYR988EHk5+fj9ddfR9OmTXU1rM0rrIOBTXD/NVN+yk9GQGat/ifnt2DtdiS1u9XkhGVu2LpUbIsldxvhAp/vvvvOpEFj+d3vfmf+/Ze//AXz5s2rS/00bdHIUnZJdDBQfjICMmv1v/rPb+H6XUi4uI/ZbYS7jtSlYlss2Ql+OsJPSI4cOYLS0lI0adLErIbt1auXpruzcZV1MLABzcVE+Sk/GQGZtfqfnN+iTd8i/sJeZh9L7mdZl4pILJmAgNEkM/cwoTo3fOZULL+3rItFI0vZVdHBQPnJCMis1f/qP78Pt/6IuFbXoHDfJhz9um59q29bLDkNywQETHP31ltvISEhAbm5ubKr6WdrFUsZYB2slJ+MgMxa/a/+81v8WRZiW1yBo998hMLvNso67LC1bbFkO/jOMiQkBF27djWJCep6UbGUXSEdrJSfjIDMWv2v/vNb+mUeYpq1R8HuFTi2f5usww5bi8RywYIF5j0lt+M6dOgQqv6bx++hhx5CYWGhw02VV6diKWOog5XykxGQWav/1X9+y74qQoMmFyNvx4coPvilrMMOW4vEUjd/dvZq6GAg46n8lJ+MgMxa/U/Ob/nXFYhOPQ+52+eh5FDd2oxDJJZ8T8mFPe6lzFN5hgAAIABJREFUrr671MhS7syzZ8+WVeJHax2sZHCVn/KTEZBZ0/9WfhuKyOTmyNk2G6VH9skqdNhaJJYpKSmYNm0azj77bGzduhVRUVF4++23sXLlSoeb6Ux1KpYyjjqYKj8ZAZm1+l/957dqfzQi4hvjyJZ/oSzvgKzDDluLxHLRokVIS0szTeKq2BYtWiA2NhY9e/bU7yxtXCgdDGxAczFRfspPRkBmrf4n57f6QBzCGzbC4Q0zUFF4WFahw9a2xdLK4MNorUGDBmY/y8WLF2PEiBHmx9foMiYmxmzxxbR5riUpKcn8jskOPBUKdElJSa0rcjWylHmPDgbKT0ZAZq3+V//5rclshLDoOGStewXHi0/WAlnv5da2xZL7WG7cuBH79+83+WH56QjfVV555ZW4+eabkZmZ6XXrZsyYYRIbMKlBdHQ07rzzTsTFxYG/p0hSSBnFjhkzxiRtt8Rx8ODB5u/z589H//79kZeXV+M5VSy9viTVHqiDlfKTEZBZq//Vf35rs5sgNDwKh1ZPRmV5sazDDlvbFku2g8kILrroopOadPDgQdx6661eN5ORI8WO32qyrF27Fu+88w4uvvhik0pvwIABRkhnzZqFYcOGYdSoUejTp4/JPTthwgS0bNnSJG+nkNZWVCxrI1Tz33WwUn4yAjJr9b/6ze+2X92Oj3ObAyEhyFwxHlWVx2UddthaJJZcCUvhuu2225Ceno5vv/0WzzzzTK0RXnV96N69OwYOHIgLLrgAv/jFLzB58mRwV5Nx48aZFbf8TIV/Hz9+vMlDSyHt3bs35syZg759+6KsrKxWNCqWtSKq8QAdrJSfjIDMWv2vfvPre9sd2JjfHFWVFchcMUHWWT9Y2xJLLup59NFHTTTI6dYXXngB27bJsi0w+fqQIUPMIiFGjPfeey+WLFmCiRMnmm5bYsn9MinQS5cuNe9GMzIy0Lp1a7Rt2xZTp049sdvJoEGDwB/3wj04tSgBJaAElEDdIlBaGY4tBc0QEXIcnRN+qFuN+29rqvt0LqRjx45Vnlr7xhtv4JJLLjnx5+Li4hPTqL72kELH1bN8F8ny2muvIT4+3uxkwixAw4cPB9+PMsrs3LnziYU+iYmJmDlzJqZPn25ElpHoyJEjwQjV02IgjSx9vTonH69P9spPRkBmrf5Xv/n9rN892Ha0KSqKcnH441dlnfWDta3Ikgt7ioqKTPT37LPPGuG8/fbbzTSsr4XvG99//33cd999+Prrr01k+Omnn5qIkY1jxMl3lf369UO3bt1OVD9p0iQsXLgQzZo1M0I9dOhQsxqXx3uaklWx9PXqqFjKiCk/5eckAVlddf1ho0/fe/F5YROUFxxC9qaZss76wdqWWFJ0+MPVqHfddReefPJJPP3002Zq1E5hYoOOHTuaiJCfiTzwwAMmqqRw8t0kk7WPHj0azEXLwqlavstk47l6lqFxamqqWSjEnVA8FRVLO1fnfzZ1/WbT9un1lRGQWav/yfj17tsfXxamoyx3P458MktWmR+sbYtldna22fyZC3IYWVKIvv/+e9NEvnPkd4++FE698pMQrqZ1LW3atMHevXtPqs9KhJCVleXLKUwbO3Xq5JNNIA/Wm01GW/kpPxkBmbX6n4xfz1/cj13H0lBy+BvkfvYfWWV+sLYtljW15aabboKvQuaHvp1SpYqljLIOBspPRkBmrf5Xv/l1+/mD+LooBcWZXyHviw9knfWDtS2x5DvEsLAwj8156aWXvPqUww/9qbFKFUsZcR2slJ+MgMxa/a9+8+ty60P4tjgZRT9+jvyv7L3SkxGq2dqWWPqzQf6sW8VSRlcHK+UnIyCzVv+r3/yuvOVhfF+ShGPfb0HBntWyzvrBWsXSD1DtVqmDgV1yP9kpP+UnIyCzVv+T8bv8lkH4sSQBR/d+jMJv18sq84O1iqUfoNqtUm82u+T+v70zgbKiuN74nX1nFpgBRjYZZREQBRTZEURRcQGNRqPEkEBAEYkSIgmJSgg5hAiCCERMgiTRIIKyChhBDYoiRtSAokIAWQeGmWGflf/5KnnzHx68mdd9u9/GV+dwgJm+t6t+dbu+ruruWxRLHTnyIz8nCOh8dBgwXPaXpMnRr9bJiV2bdM5csKZYugDVrkuKpV1yHOx15MiP/JwgoPPR/uYRkl+aKsVbV8vJvZ/pnLlgTbF0AapdlxRLu+Q42OvIkR/5OUFA56PtTQ9JQVmyFH2+TE4d+FLnzAVriqULUO26pFjaJcfBXkeO/MjPCQI6H61vHClF5Uly5JNFUnLYejY43dlrt6ZY1s4oYEdQLHWoyY/8dAR01ow/Hb8W/UfJsYoEKdj0spQW7tE5c8GaYukCVLsuebHZJceZkY4c+ZGfEwR0PvL6j5aTFXFy+IMXpeyYtexsujP7Z02x9I9TQI6iWOowkx/56QjorBl/On7NbviJYJuu/PfmSsXJIp0zF6wpli5AteuSF5tdcpwZ6ciRH/k5QUDno/H1j0n5mWg5+M5zUll6UufMBWuKpQtQ7bqkWNolx8FeR478yM8JAgofUVGSe91jckai5MDaZ+RMRZnCmTumFEt3uNrySrG0ha3KiPzIT0dAZ834s88vOjZB6l87Ss5UVsiBt6bad+SiJcXSRbhWXfNis0rs7OPJj/x0BHTWjD/7/GIS0ySnx3CpLDslB9+ead+Ri5YUSxfhWnXNi80qMYqljhj5kZ+TBOz7ik2pK9ldh0jFqWLJX/+8fUcuWlIsXYRr1TXF0ioxDvY6YuRHfk4SsO8rLr2h1Lv6PvPJCD4dCcVCsQyhXqFY6jqD/MhPR0Bnzfizzy8hq6lkdbzLJCNAUoJQLBTLEOoVXmy6ziA/8tMR0Fkz/uzzS8y5VDLb327S3CHdXSgWimUI9QovNl1nkB/56QjorBl/9vklNWwjGW1vMgnUkUg9FAvFMoR6hRebrjPIj/x0BHTWjD/7/JIbXynpra4zW3Nhi65QLBTLEOoVXmy6ziA/8tMR0Fkz/uzzS23WWdIu7Skndm+So9vW2XfkoiXF0kW4Vl3zYrNK7OzjyY/8dAR01ow/+/zSLukhqRdfI8d3vC/Htr9n35GLlhRLF+Fadc2LzSoxiqWOGPmRn5ME7Puq07KvpDTpIEe/fltO7PzIviMXLSmWLsK16ppiaZUYB3sdMfIjPycJ2PeV3qa/JOe2k+Iv3pSTezbbd+SiJcXSRbhWXVMsrRLjYK8jRn7k5yQB+74yL79VEuu3lKJ/r5BT+7fad+SiJcXSRbhWXVMsrRLjYK8jRn7k5yQB+76yOtwpCXUvlsLNr8npQ9/Yd+SiJcXSRbhWXVMsrRLjYK8jRn7k5yQB+77qXnWvxGdcJAUfL5DSI7vtO3LRkmLpIlyrrimWVolxsNcRIz/yc5KAfV/ZXR6Q2NRsOfzhX6Ts6AH7jly0pFi6CNeqa4qlVWIc7HXEyI/8nCRg31dO92ESk5Quh97/o5SfOGLfkYuWFEsX4Vp1TbG0SoyDvY4Y+ZGfkwTs+6rfe6RExyVJ/ruzpaLkuH1HLlqGjFjGx8dLVlaWHDhw9hQ8MzNTiouLpbKy0ieG1NRUOX36tJSXl9eIatOmTdKpUycXcepcUyzJT0dAZ834Iz8dAfvWDfo+KlHRMXJg3XQ5U15q35GLliEhllOnTpUuXbrIoUOHJCEhQZ588knZu3evzJs3z4hkcnKyrFy5UiZOnCizZs0SjzgOHz7c/H7p0qUyePBgKSoqolgGIVhcPKUl1xzsLeE652DyIz8dAXvWUTHx0qDPIxIlIvvenGLPSQCsgi6WEMJ3331X+vXrJ4WFhUYMMcOEcKakpMiQIUMkLy9PFixYIKNHj5bx48dL//79ZcmSJTJt2jRp3ry55ObmGiGtrXBmWRuhmn/PwZT8dAR01oy/yOQXk5AqOT1HSGxUpXy75mldI120DrpYom0Qu3379kl6erosW7bMzBS7du0qGzZskClTpkh0dLRs3LhRhg4dKpiFlpSUGCGFwC5atEgGDhwopaW1T90plrpI4mBFfjoCOmvGX2Tyi03OkuxuP5SE6HLZuXqarpEuWoeEWKJ9N9xwgzzxxBNSUFAg99xzj7zyyiuyevVqmT59umm+Ryy3bNliZpZr1qyRsWPHyv79+6V169bStm1bmT17tplxogwbNsz88S6TJ092ESddkwAJkAAJWCFwvCJeNh/LlZSYUrkybZ8V04Afu3DhwnPOGdWxY8czgarJfffdJ6NGjTJLrU8//d9p+Jw5c+T48eMyZswYiY2NNbPMzp07V73ok5GRIfPnz5e5c+fKiBEjZObMmTJu3Djp1auXz5eBOLPU9Sjv7MlPR0BnzfiLTH7xmU2kbqe7pU5siWx7Y4aukS5aB31miSXWDz/8UJ566ilZvnx5VVMhgKhcnz59zLPKQYMGSc+ePat+P2PGDFmxYoU0atRIevToISNHjpRVq1aZ430tyVIsdZHEwYr8dAR01oy/yOSXmH2JZF4xUDJjT8nWN2bqGumiddDFEp9yYBZZveBN2Pvvv98sqeLZZFRUlEyYMME8z0Rp1qyZeZaJyicmJgqmxtnZ2eZZ56RJk3zioljqIomDFfnpCOisGX+RyS+pwWWS0e5mqRd3Uj5f+ZyukS5aB10sa2tbmzZtZPv27eY7Sk/Jyckx/8zPz6/N/KzfUywt4TrnYA5W5KcjoLNm/EUmv+RG7SW99fVSP/64bF4xW9dIF61DXiydbDvFUkeTgxX56QjorBl/kckvpelVUqdFb8lNOCofL/+DrpEuWlMsXYRr1TUHA6vEzj6e/MhPR0Bnzfizxy8tr5ukNu8qjROLZeOy5+05CYAVxTIAkP09BS82f0md/zjyIz8dAZ01488evzotrpWUpp2kWWKhbFj2gj0nAbCiWAYAsr+n4MXmLymKpY4U+ZGfGwTs+Uy/7AZJvuhyyUsqkPVL/2TPSQCsKJYBgOzvKSiW/pLiYK8jRX7k5wYBez4z2t0iSQ1aSYvkw/LOkj/bcxIAK4plACD7ewqKpb+kONjrSJEf+blBwJ7PrCvvkIR6zaV1Sr6sff1Fe04CYEWxDABkf09BsfSXFAd7HSnyIz83CNjzWbfTPRKf2Ujaph6UN1+bb89JAKwolgGA7O8pKJb+kuJgryNFfuTnBgF7Putd832JS8uR9mn7ZdXiv9pzEgArimUAIPt7Coqlv6Q42OtIkR/5uUHAns+cbkMlJjlDOqTtlRWLX7LnJABWFMsAQPb3FBRLf0lxsNeRIj/yc4OAPZ/1ez0k0fHJclWdPbJ00cv2nATAimIZAMj+noJi6S8pDvY6UuRHfm4QsOezQZ/REhUTJ9ek75bXXl1gz0kArCiWAYDs7ykolv6S4mCvI0V+5OcGAes+IZIQS5RuGbvk1YWvWHcSIAuKZYBA+3MaiqU/lHwfQ37kpyOgs2b8WecXHZ8i9Xs9KGfKS6VHvX1mB6lQLRTLEOoZXmy6ziA/8tMR0Fkz/qzzi03OkOxuQ6Xi9DHp1aCAYmkdoTsW3HVEx5WDAfnpCOisGX+Rxw+fjODTkfITBdL7omMUS10XO2dNsdSx5GBFfjoCOmvGX+TxQzICJCUoK94v1zYtoVjqutg5a4qljiUHK/LTEdBZM/4ijx/S3CHdXcmRXdK3+RmKpa6LnbOmWOpYcrAiPx0BnTXjL/L4IYE6Eqmfzv9armsRR7HUdbFz1hRLHUsOVuSnI6CzZvxFHj9szYUtuk7t3yL9WqdQLHVd7Jw1xVLHkoMV+ekI6KwZf5HHD5s+Y/Pnk99uluvbZVAsdV3snDXFUseSgxX56QjorBl/kccvtXlXScvrJsd3bpT+V+RQLHVd7Jw1xVLHkoMV+ekI6KwZf5HHr06L3pLS9Co59s16ubFTI4qlrouds6ZY6lhysCI/HQGdNeMv8vilt75ekhu1l6Pb1spNnZtTLHVd7Jw1xVLHkoMV+ekI6KwZf5HHL6PdAElq0FqKt6ySm7u1oljqutg5a4qljiUHK/LTEdBZM/4ij1/mFYMkMTtPCj9bKrf0vJxiqeti56wpljqWHKzIT0dAZ834izx+dTt9V+IzG8uRf70qt/bpRLHUdbFz1hRLHUsOVuSnI6CzZvxFHr96nQdLXJ36UvDRS3Jbv64US10XO2dNsdSx5GBFfjoCOmvGX+Txy+72I4lNzpTDH8yT2/v3pljqutg5a4qljiUHK/LTEdBZM/4ij1/9ng9KdEKK5K9/XgYNuJ5iqeti56wpljqWHKzIT0dAZ834izx+Dfo8IlEx8XLwnZlyx20DKJa6LnbOmmKpY8nBivx0BHTWjL/I4hcVHSMN+j5qGnXgraly5x2DKJb+dnFWVpYUFRVJZWVllUlmZqYUFxef9TNvf6mpqXL69GkpLy+v8VQUS3974vzHcbAiPx0BnTXjL7L4RcclSf3eI+VMRZkcWPuMhGv/RnXs2PGMrmv8t27evLl0795dHn74Ybn77rtlx44d0qRJE5k3b54RyeTkZFm5cqVMnDhRZs2aJR5xHD58uPn90qVLZfDgwUZoayoUS//75HxHhmsw61rtnDX56ViSX2Txi0lKl5zuw6Sy5IQcfHcWxdKf7h01apR07txZWrZsKXfddZcRy2effVZSUlJkyJAhkpeXJwsWLJDRo0fL+PHjpX///rJkyRKZNm2aQGhzc3ONkNZWKJa1Ear59xysyE9HQGfN+IssfnGp2VKvywNSfrJQDr33AsXSSvdCzDxiuXjxYtmwYYNMmTJFoqOjZePGjTJ06FCZOnWqlJSUGCHt16+fLFq0SAYOHCilpaW1nopiWSuiGg/gYEV+OgI6a8ZfZPGLz7hI6l51r5QdPSiHP5xPsbTSvdXFEsuuq1evlunTpxsXHrHcsmWLmVmuWbNGxo4dK/v375fWrVtL27ZtZfbs2WbGiTJs2DDzx7tMnjzZSpV4LAmQAAmQgAsECsuSZMuJ+pIee1rapR5w4QzOu1y4cOE5TgP6zNJz9upiOWfOHDl+/LiMGTNGYmNjzSwTS7Wel38yMjJk/vz5MnfuXBkxYoTMnDlTxo0bJ7169fL5MhBnlrrg4Z09+ekI6KwZf5HFL7F+S8m8/FYpObRdjmxezJmlle6tLpYQQFwcffr0Mc8qBw0aJD179qxyN2PGDFmxYoU0atRIevToISNHjpRVq1aZ430tyVIsrfTGucdysCI/HQGdNeMvsvgl57aT9Db95dSBL6To8+UUSyvdCzG78847ZefOnVKnTh2zpIpnk1FRUTJhwgRZtmyZcdesWTPzLBMXT2Jiovk2Jzs727wVO2nSJJ+npFha6Q2KpY4W+ZGf0wR0/kLtZiOlSUep07KPnNzzqRR/sYZiqetekTZt2sj27dvNd5SekpOTY/6Zn59vyT3F0hKucw4OtYvNu4KsH/tXR0Bnzfizxi/14i6Sdkl3ObHrIzn61dsUS2v43D2aYqnjy8GA/HQEdNaMv8jil3ZpL0ltdrUc2/6eHN/xPsVS173OWlMsdTw5WJGfjoDOmvEXWfzSW/WT5MZXyNGv1smJXZsolrruddaaYqnjycGK/HQEdNaMv8jil9H2ZklqeJkUb10jJ/d+SrHUda+z1hRLHU8OVuSnI6CzZvxFFr/M9gMlMecS8yYs3ogN1/4NyneWulCo3ZpiWTujmo4I12DWtdo5a/LTsSS/yOKX1fEuSchqKkc+WSwlh7dTLHXd66w1xVLHk4MV+ekI6KwZf5HFr97V90lcekMp2PR3KS38lmKp615nrSmWOp4crMhPR0BnzfiLLH7ZXYdIbEpdOfzBfCk7dpBiqeteZ60pljqeHKzIT0dAZ834iyx+OT2GS0ximtlxBDuPhGv/8pmlLi5tWYdrsNhqrAtG5KeDSn7kpyNgzbrBtaMkKjZBDr4zSypLT1AsreFz92jOLHV8OZiSn46AzprxF0H8oqKlYd9HRaKi5MDaZ+RMRRnFUte9zlpTLHU8OViRn46AzprxFzn8ouMSpX7vh+VMZbkceGuaaVi49i+XYXVxacs6XIPFVmNdMCI/HVTyIz8dAf+tYxLrSE6PH0tl6Uk5+M5zFEv/0QXmSM4sdZw5mJKfjoDOmvEXOfxiU+tJdpcfSMWpIslfP5diqeta560pljqmHKzIT0dAZ834ixx+8em5Uvfq70nZsXw5/MGLFEtd1zpvTbHUMeVgRX46Ajprxl/k8Euo20yyOnxHSov2SsFHL1EsdV3rvDXFUseUgxX56QjorBl/kcMvMaeFZLa/TUoO/0eOfPIqxVLXtc5bUyx1TDlYkZ+OgM6a8Rc5/JJy20pGmxvl9MFtUvjZUoqlrmudt6ZY6physCI/HQGdNeMvcvilNO4gdVr1lZN7P5firasolrqudd6aYqljysGK/HQEdNaMv8jhl3rxNZJ2SQ85sftjObptLcVS17XOW1MsdUw5WJGfjoDOmvEXGfxiktIls/3tEpeWI8d3bJBj29dTLHVd67w1xVLHlIMV+ekI6KwZf+HPD5l7sOlzfGYjKTm8Qwo/fV3OVFZQLHVd67w1xVLHlIMV+ekI6KwZf2HOLypKstoPlITsPCkt2iOFm1+XyrJTVY0K1/5lujtdXNqyDtdgsdVYF4zITweV/MhPR6Bm64x2AySpQWuTiAAzyopTxWcZhGv8USzdjBofvsM1WIKA6rynJD9dT5Af+ekI+LZOb91PkhtdYQSy8NPXpOzYoXMODtf4o1i6FTU1+A3XYAkCKoqlC9AZfzqo5Hd+fmmX9pLUZlebJdfCza+ZrD3nK+HKj2Kpu25sWYdrsNhqrAtG5KeDSn7kpyNwrrXnExG8xIMZJTL2+CrhGn8US6ejxg9/4RosfjQtIIeQnw4z+ZGfjsDZ1smNr5T0VteZHxZ+tkxOH/yyRvfhGn8USyejxk9f4RosfjbP9cPIT4eY/MhPR+D/rZMaXiYZbW82PyjeulpO7v2sVtfhGn8Uy1q71vkDwjVYnCdhzyP52ePmsSI/8tMR+K91Qr08ybpykPn30W3r5MTuTX65Ddf4o1j61b3OHhSuweIsBfveyM8+O1iSH/npCIjEZzQyQhkVmyDHtr8nx3e877fLcI0/iqXfXezcgeEaLM4R0HkiP/LTEdBZX+jxF5uabYQyJrGOnNj1kRz96m1LQMOVX8iIZWZmphQXF0tlZaVP8KmpqXL69GkpLy+vsXOYwcdS7J5zcLgGs67VzlmTn44l+YUev/iMi0zqOswo8W/MKE/u+VSKv1hjubLh2r9BF8smTZrIvHnzjEgmJyfLypUrZeLEiTJr1izxiOPw4cPN75cuXSqDBw+WoqIiiqXlEPXfIFyD2f8Wunsk+en4kl/w+cUkpEpC3Yslvm4zI44xiWlnVerUgS+k6PPltioarv0bdLF89tlnJSUlRYYMGSJ5eXmyYMECGT16tIwfP1769+8vS5YskWnTpknz5s0lNzfXCGlthTPL2gjV/PtwDWZdq52zJj8dS/Jznl9UdKxExcZLVEycRMXES3RsnETFJkp0fJJExyX/7+8kiY5PltikdMFSa/VypqLM5HktLdxT9bfdWoZr/wZdLBcvXiwbNmyQKVOmSHR0tGzcuFGGDh0qU6dOlZKSEiOk/fr1k0WLFsnAgQOltLS01j6yK5YN+/20Vt88gARIgAQuBAJlRw9K6ZHdUlK4y/zt2TVE23aKpU2CWHZdvXq1TJ8+3XjwiOWWLVvMzHLNmjUyduxY2b9/v7Ru3Vratm0rs2fPNjNOlGHDhpk/1QtENiEhwXKNbhm3wLINDUiABEgg1AnEx8VIUnysJCbESlJ8nPk7JTFO6iTHS1pygqT972/P/1s0ypLkxLhQb5Yr9du9e7cMGvTfT2Kql6DPLOfMmSPHjx+XMWPGSGxsrJlldu7cuepFn4yMDJk/f77MnTtXRowYITNnzpRx48ZJr169fL4MZHdm6Qr58zhl/XSkyY/8dAR01oy/C5Nf0MUSAohpeZ8+fcyzSih6z549q3pjxowZsmLFCmnUqJH06NFDRo4cKatWrTLH+1qSZTBfmMGsa7Vz1ow/HUvyIz8dAZ21r/gLuljWqVPHLKni2WRUVJRMmDBBli1bZlrbrFkz8ywTYpqYmCgLFy6U7Oxs81bspEmTfBLhxeZOsOi8OmfN/tWxJD/y0xHQWYdr/AVdLD3Y27RpI9u3bzffUXpKTk6O+Wd+fr6l3gnXzrDUSBcPJj8dXPIjPx0BnTXjzx1+ISOWuuadbY0Xfp5//nknXTrqi/XT4SQ/8tMR0Fkz/i5MfhEplrqupDUJkAAJkAAJnE2AYsmIIAESIAESIIFaCFywYonnoVafhXqzDBUfbkZ548aN5dChQ2c9S3bzfHZ8t2jRQr766is7pq7bgN+3337r+nnsnqBevXpy+PBhu+au24U6PwAI9fgL5evXiTE0Pj5ekpKSTG5xu8UfHxecWHbq1EluvvlmueGGG6Rr16622DrhY8CAAfLwww8bEcKbwMhYhAQNoVIuv/xy803r0aNHTc5eZFB67rnnQqV6VfXwJKW4+uqra0zCH+iK33vvvaZ/CwoKBBfia6+9ZpJphEpBwo9f/epX5htn5GBGGslQir9Q5+fpx1CNv1C/fp0Y//Bdfu/eveWHP/yhnDlzRhAzVosVHxecWCIXLT5JqVu3bpVYIs0efg4RRPaf3/zmNyarkKfg05bbbrut6v/n82G1kzB4fvHFF/Lzn/9c8C0p7rC++93vWnXj2vFo87vvvitPP/204O4e37i+9NJLrp3PjuP27dubQb5hw4YSamK5bt06+dOf/iR/+ctf5K677pKf/OQn0qVLFzvNdMQGNz6oz7/+9S/jb+3atSbZBzYxGDVqlNx+++3m2+VgFe/6hTo/cAql+PPmF2rXr3f97Ix/GJ+xkQbiFSUrK8uM2/gGf+/evVViibFg8uTJ5iYfYyxuaDz9RvVhAAAMCUlEQVTf5Hs25fC8AOrLx/mugwtOLAEBgwIGWc/M0vP/xx9/XG699VYjDBjYPNuFffTRR3LVVVedxc/bh9VBBndBGECRxg+D/a9//Wvz/WioFAglZh34rrWwsFCefPJJef99/zd4dbsdmI3je1wkqcCgH2piieVN7I6D7eT+9re/mdklvhcOVkHaSNz4eG4CP/zwQ7N5AdJKXn/99eZ6AMNgFe/6hTq/UIs/b36hdv1618/O+HfTTTfJI488YlYFq5ff/va30rRp0yqxfOONN4xIYqXkiSeeEPzf810+rgGM6z/96dl5wL19XDBiiWWl+++//5z2YlA9ceLEOWKJux48d1i/fr2xgWDiDsQjkJjm//GPfzS/8+XD1yBz3333SVra2dvbvPXWWya9H/LcfvbZZ+YOFWn+Hn300YCPVb7q99e//tU8y8INBAILg8P58iW6XWFf9YN4f/nllyaJBb4rC5ZY+qofnqFipxwsXaenp5t4Ql8Hunjq973vfc/kXf76668F8QcBx4CF/1955ZWCtJNIMxno4qt+oc4v1OLPu39D7fr1rp+V8a9ly5ZmzMbfmF0idvF80rPSVV3okOQGqybI8obZJMZwPOrCIxDY48YQBeK9bds2cyzKBSuWDRo0qBK36hf/D37wA/NSj/es8JVXXjED2r///e+qwyGKnruR+vXry8GDB83vfPnwNcigY5HftnrBwPTLX/7SdBCWIzBoYYZk9xmqZoDzVT/k333hhRfM0l2HDh3MYBqMmcf56oe+gYCfOnXKNB0P9/Fv3HGePHlSg8OyrS9+//nPfwy/Tz/9VB577DEzSw9G8dQPKwSoAzihLzFYIf4wqOCOHakmPQNJIOvpq36hzC8U48+7f0Pt+vWun5XxD4/AsJSK6xwTIbywhNUu3Gh5Cx0228Cjj3feeacqjLdu3SoXXXSRuRnMzMw0P4f9Bx98YFb0LmixrO1i9xZLdASe2WBG2apVK5PQ4LrrrqsaeN1Yhv3HP/5hlsSQzu9nP/uZSQyPQStUCrZOwzIilupw4WFZGnxCoeAZM+4wPQUbheNlGiwtepbOg13P5cuXmzd0g7FacL62ey+DYeaB/oVIYvCHiGIFJVjFu36hzC8U48+bX6hdv971szP++bsMi8dFmPnjHNiAAxMdvBuCwmVYi1e4t1hi5ocpPR724u2oN9980wiEp7ghlvfcc48ZqDC4400uiCZmmaFSsDT3+9//3tzJ4aUnPNNCwIdiCeYyrC8eWPLEoOop6OdgzMw95/cerDyrBfh9RUWF3HnnneYliWAV7/qFOr/qnEIh/rz5hdr1610/O+NfTWLZpEkTwVIvyoMPPmheBELBdXf33XdXfb5Vk1hW93G+6+CCfMHH14CQl5dn7rD37dsXkDEDgymWDfCSRagWBBD2d2OJPAJ46QjP6rFMFSoz8sijHNwWhfL16+b4h9hGvnE8BnEqtimWwY1lnp0ESIAESCAMCFAsw6CTWEUSIAESIIHgEqBYBpc/z04CJEACJBAGBCiWYdBJrCIJkAAJkEBwCVAsg8ufZycBEiABEggDAhTLMOgkVpEELmQCyMqCgqT+LCQQLAIUy2CR53mDSuDVV181CfW9C14zR+YdJKpAkuYXX3zR0XoikwiSXG/evFl+9KMf+e37z3/+s7Rr104GDhx4zpZf//znPyUmJsbxDFDI8IPEBUhK7avYbY/fDRcxOYmjoqJqTERfUz2wSQE+UkeaP+yew0ICdghQLO1Qo03YExg7dqzJiYr0V0jajfyR2HcSKeGwMwey2SAzENL9OVmwgwuST3z++ecmdaK/BbuDIJcw8vN6f/eKnMYQS6d3NUFGJE8uZV/1tNsef9uN4373u9+Z9iFtoJ16gBv4If8yMj2xkIAdAhRLO9RoEzEEID5IhfXxxx/Lj3/8Y9OuESNGGLFEcgrMSlCQPxh7jmI7tSuuuMKIHfYMvPbaa02e2htvvFESEhLMB/7wg91GILZIko9Z0Y4dO0wuS8yAIJaYseF4/EFSCpwPs1rkquzXr5/JJIU8t8hPjPyt1cUSwo40Xtit5siRI0bwYestlthtoayszPjClnSYzSJbT8eOHY0Iop1IJo1zYF9AHIfk+cgshTR9SCmI7FKffPKJmWl7jsPH5Dt37jT5jJGvs6b2eAIFGVxQR+yjiZk7klpDuC655BKzkwTyeeI8OPell15qkoPgfEgJid14IJbYhxb9AXvkf0aOULAF64ceeshnPZD2DEu5aDvOg40TWEjAKgGKpVViPD6iCNQklhhckQEEM1AMykh0jzyqGOBRIKZIk4ik5JiVQpiw0wiWDZGUHwKDVIkQFwjU3//+d1mwYEFVWkPsQoJcxMg2go2YIaAQYwgFBBQ2EAKIYHWxhMhApCGUKEjTiHN7iyUSRUMAd+3aZfb8g+BgJwYkkcbmABB8pDTEUjN2cUCKOQj1N998I8jNChHD+bFzCm4CkDMZTCCUyHaF5V/cQHjSNHq3p/pm0shVimwyyC+MpdC4uDjDA36wM8QDDzxglr2xuw1mtPgZ6ovk7tiKzTNznjZtmtlCDzcSaDM2QMDNA9Kn+aoH0lviZgA3AhBgbF/FQgJWCVAsrRLj8RFFoCaxxGAPEcMgjOVGzIowK4FYYoNw/BwDPv4PccTMCbNNiB1mot///vfNz7BnKfINY0aE3Rdgh5/dcsstZlaLOmAWCoHAH8zqMKDjXNdcc405BjvTeJZhIW4QFeSaxQwLwuZLLHF+iDxErWfPnmbmiuVItA3LuRApzHhxQwBfmK1CiHBs9WVY5NREsn/k3YSoIsE+lqzhy1d7qi9hY8Ne5Ot8+eWXjbBBtLGzCM6J2SmWpLEDCc6NBPSYXaKN4IJzecQSXHBzgfrhRgD/ry6W5+OK+nIZNqIu26A0hmIZFOw8aagQqEksPc8sIXyYMSKRM2Z+EEcIJ2Yq2A8PS3wQS8y6UDDj+8UvfmFmnJjRYOaHUn0m5nlmiedwEA/st4djkSsYS7KY0eLFmr59+5pndji3RywXLlxozuXZ0g1igP+fb2YJUYKw4GUdzBohvEj8jWVRiCUEDS+/QHAwE4UgQuy9xRJChqVT1BV7YHbr1s0I3oEDB856Blu9PZ49YNH23Nxcs5wKMYQ4ghtmiBA+PCPGcis2L8Dv4d9TMJPHjYlHLD0bkKPtsH3vvfdMon/PzPJ8XCH6FMtQueLCtx4Uy/DtO9bcAQJ2xRLCg+VMCAKeS+IlGzz/wwwQy5FYtsXMD2+xYrkW4olZD2Zm1V/wqS4u2GsVO7vjOIgHdlGASHTv3t0sgXrE8qmnnjJvxmJWBeHBbNTXzLI2sURdBwwYYJZGIVTYNL36zBIijDrj3HieiDquW7fO7OSAGSC2l/PVnupiia7CHoOYLeImA0u7aCsK2gPBxFvCaAeWhr/zne+Y57uYVYKhRyxxU4H2FhQUmOeuWNqtPrOsSSyxVI4ZLv5mIQGrBCiWVonx+Igi4BFLzLaGDx9u2uZ5wcczs8RzRjxbqz6z9Igl3qT9wx/+IE2bNjW2ECc8q8SgD7FITEw0P8eADsHEiz4QF4gUhABLrhBYiABEAee6+OKLqxhj38lnnnnG/M7z6Qhe1sE5ISBYZvVs81bTzBLP6vD8DzNLPEfFsjBmljg/zglfqDNEHs9Y0T48Q7zsssuMuIHT66+/XjVLRjuxhItZqK/2eIul53kjRB7L22+//bZ5gQj1xrNR8MeSsIcZjkP9cCOCOmE2ic99JkyYYGbgx44dk7S0NMMWDH3VAy/14FyYieJln8cffzyiYpiNCQwBimVgOPMsEU4Ae6JiqfHLL788a0sgPMPEiztWtjnDG58QBSztYpZ3voKBHy8H4XwQLk2BEHl8eW9nhP1M8WwQbUDBTQNmh55ntJrz+rLFLHbPnj1V56x+HGaGuJnAEjWEHEvUeIHpjjvuqLEqaCO4IrGBL6ZutIU+I4cAxTJy+pItIYGIJ4DZLD5ZqV4gmHiuzEICbhKgWLpJl75JgAQcJ4A3k7GkjKVbvDSFpW0WEnCbAMXSbcL0TwIkQAIkEPYEKJZh34VsAAmQAAmQgNsEKJZuE6Z/EiABEiCBsCfwf+dWEQNsza/RAAAAAElFTkSuQmCC", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting\n" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.unlinkables_chart()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_splink-0-3-tests.ipynb b/notebooks/models/splink/WL_splink-0-3-tests.ipynb deleted file mode 100644 index 4dc215a..0000000 --- a/notebooks/models/splink/WL_splink-0-3-tests.ipynb +++ /dev/null @@ -1,1264 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "835e6f04-c048-4bbd-b724-b9e2effabe36", - "metadata": {}, - "outputs": [], - "source": [ - " %load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "bfd37dca-f4b0-454e-adc8-ac298721a68c", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "import splink.duckdb.comparison_library as cl\n", - "from splink.duckdb import blocking_rule_library as brl" - ] - }, - { - "cell_type": "markdown", - "id": "1eb64f89-6864-477c-9dd8-9b4c534d0a6d", - "metadata": {}, - "source": [ - "# Get it working\n", - "\n", - "By hook or by crook. Let's work out where the unit test is failing.\n", - "\n", - "Seems it's the m estimation. Can't estimate with one variable -- let's generate a second one here and see if I can just manually insert into the unit test." - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "id": "fad0e51d-fdc6-49a5-bf93-883727aadde1", - "metadata": {}, - "outputs": [], - "source": [ - "df_l = pd.read_csv(\"df_l.csv\")\n", - "df_r = pd.read_csv(\"df_r.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "0d33f969-3d1d-42f5-ab70-b6cab2f2e0d6", - "metadata": {}, - "outputs": [], - "source": [ - "df_l[\"rand\"] = range(df_l.shape[0])\n", - "df_r[\"rand\"] = range(df_r.shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "ced69860-2025-44d2-801d-96d1069cb689", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cluster_sha1crnrandrandish
0b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...01HHGX9BHARZT77WHVWCYJSWSF00
1b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...01HHGX9BHARZT77WHVWCYJSWSF11
2b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{'01HHGX9BHF9HS4Z9E3FYGY7R9222
\n", - "
" - ], - "text/plain": [ - " cluster_sha1 \\\n", - "0 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "1 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "2 b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{' \n", - "\n", - " crn rand randish \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF 0 0 \n", - "1 01HHGX9BHARZT77WHVWCYJSWSF 1 1 \n", - "2 01HHGX9BHF9HS4Z9E3FYGY7R92 2 2 " - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cluster_sha1crnrandrandish
0b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f...01HHGX9BHARZT77WHVWCYJSWSF00
1b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k...01HHGX9BHF9HS4Z9E3FYGY7R9211
2b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa...01HHGX9BHG70V8V6ZXVTJPJ7PX22
\n", - "
" - ], - "text/plain": [ - " cluster_sha1 \\\n", - "0 b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f... \n", - "1 b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k... \n", - "2 b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa... \n", - "\n", - " crn rand randish \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF 0 0 \n", - "1 01HHGX9BHF9HS4Z9E3FYGY7R92 1 1 \n", - "2 01HHGX9BHG70V8V6ZXVTJPJ7PX 2 2 " - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_l.head(3)\n", - "df_r.head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "cc9713ce-d749-4b3b-9519-47f6395583bd", - "metadata": {}, - "outputs": [], - "source": [ - "splink_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"cluster_sha1\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " brl.block_on(\"crn\"),\n", - " brl.block_on(\"rand\")\n", - " ],\n", - " \"comparisons\": [\n", - " cl.exact_match(\"crn\"),\n", - " cl.exact_match(\"rand\")\n", - " ],\n", - "}\n", - "\n", - "linker = DuckDBLinker(\n", - " input_table_or_tables=[df_l, df_r],\n", - " input_table_aliases=[\"l\", \"r\"],\n", - " settings_dict=splink_settings,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "fbeeae81-3dfc-4cc2-82be-372ca8bf3401", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 0.0019.\n", - "This means that amongst all possible pairwise record comparisons, one in 525.13 are expected to match. With 6,000,000 total possible comparisons, we expect a total of around 11,425.71 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " deterministic_matching_rules=[\n", - " \"l.crn = r.crn\",\n", - " \"l.rand = r.rand\"\n", - " ],\n", - " recall=.7\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "ab2c6748-fb89-4e0a-ae36-7a70cde704d7", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - crn (no m values are trained).\n", - " - rand (no m values are trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(\n", - " max_pairs=1e4\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "ea44aed0-d189-447d-b199-3f911d888e2b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "l.\"crn\" = r.\"crn\"\n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - rand\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - crn\n", - "\n", - "Iteration 1: Largest change in params was -0.946 in the m_probability of rand, level `Exact match`\n", - "Iteration 2: Largest change in params was 0.00191 in the m_probability of rand, level `All other comparisons`\n", - "Iteration 3: Largest change in params was 0.000631 in the m_probability of rand, level `All other comparisons`\n", - "Iteration 4: Largest change in params was -0.000309 in the m_probability of rand, level `Exact match`\n", - "Iteration 5: Largest change in params was -0.00018 in the m_probability of rand, level `Exact match`\n", - "Iteration 6: Largest change in params was -0.000116 in the m_probability of rand, level `Exact match`\n", - "Iteration 7: Largest change in params was 7.96e-05 in the m_probability of rand, level `All other comparisons`\n", - "\n", - "EM converged after 7 iterations\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - crn (no m values are trained).\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.estimate_parameters_using_expectation_maximisation(\n", - " blocking_rule = brl.block_on(\"crn\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "65934988-5ef4-4f54-8fbe-ab5aee15d3cd", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "l.\"rand\" = r.\"rand\"\n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - crn\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - rand\n", - "\n", - "Iteration 1: Largest change in params was -0.234 in the m_probability of crn, level `Exact match`\n", - "Iteration 2: Largest change in params was 0.15 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 3: Largest change in params was 0.104 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 4: Largest change in params was -0.0765 in the m_probability of crn, level `Exact match`\n", - "Iteration 5: Largest change in params was -0.0583 in the m_probability of crn, level `Exact match`\n", - "Iteration 6: Largest change in params was 0.0458 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 7: Largest change in params was -0.0369 in the m_probability of crn, level `Exact match`\n", - "Iteration 8: Largest change in params was -0.0302 in the m_probability of crn, level `Exact match`\n", - "Iteration 9: Largest change in params was 0.0252 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 10: Largest change in params was -0.0212 in the m_probability of crn, level `Exact match`\n", - "Iteration 11: Largest change in params was -0.0181 in the m_probability of crn, level `Exact match`\n", - "Iteration 12: Largest change in params was 0.0156 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 13: Largest change in params was -0.0135 in the m_probability of crn, level `Exact match`\n", - "Iteration 14: Largest change in params was 0.0118 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 15: Largest change in params was -0.0104 in the m_probability of crn, level `Exact match`\n", - "Iteration 16: Largest change in params was 0.00914 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 17: Largest change in params was 0.00811 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 18: Largest change in params was -0.00723 in the m_probability of crn, level `Exact match`\n", - "Iteration 19: Largest change in params was -0.00646 in the m_probability of crn, level `Exact match`\n", - "Iteration 20: Largest change in params was 0.0058 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 21: Largest change in params was -0.00522 in the m_probability of crn, level `Exact match`\n", - "Iteration 22: Largest change in params was 0.00471 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 23: Largest change in params was 0.00426 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 24: Largest change in params was 0.00386 in the m_probability of crn, level `All other comparisons`\n", - "Iteration 25: Largest change in params was -0.00351 in the m_probability of crn, level `Exact match`\n", - "\n", - "EM converged after 25 iterations\n", - "\n", - "Your model is fully trained. All comparisons have at least one estimate for their m and u values\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.estimate_parameters_using_expectation_maximisation(\n", - " blocking_rule = brl.block_on(\"rand\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "3a3f8e78-c857-4d7a-b541-fc0029eed47a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'link_type': 'link_only',\n", - " 'unique_id_column_name': 'cluster_sha1',\n", - " 'retain_matching_columns': False,\n", - " 'retain_intermediate_calculation_columns': False,\n", - " 'blocking_rules_to_generate_predictions': [{'blocking_rule': 'l.\"crn\" = r.\"crn\"',\n", - " 'sql_dialect': 'duckdb'},\n", - " {'blocking_rule': 'l.\"rand\" = r.\"rand\"', 'sql_dialect': 'duckdb'}],\n", - " 'comparisons': [{'output_column_name': 'crn',\n", - " 'comparison_levels': [{'sql_condition': '\"crn_l\" IS NULL OR \"crn_r\" IS NULL',\n", - " 'label_for_charts': 'Null',\n", - " 'is_null_level': True},\n", - " {'sql_condition': '\"crn_l\" = \"crn_r\"',\n", - " 'label_for_charts': 'Exact match',\n", - " 'm_probability': 0.0395690095371488,\n", - " 'u_probability': 0.0010774806543246156},\n", - " {'sql_condition': 'ELSE',\n", - " 'label_for_charts': 'All other comparisons',\n", - " 'm_probability': 0.9604309904628512,\n", - " 'u_probability': 0.9989225193456753}],\n", - " 'comparison_description': 'Exact match vs. anything else'},\n", - " {'output_column_name': 'rand',\n", - " 'comparison_levels': [{'sql_condition': '\"rand_l\" IS NULL OR \"rand_r\" IS NULL',\n", - " 'label_for_charts': 'Null',\n", - " 'is_null_level': True},\n", - " {'sql_condition': '\"rand_l\" = \"rand_r\"',\n", - " 'label_for_charts': 'Exact match',\n", - " 'm_probability': 0.0010481235489718066,\n", - " 'u_probability': 0.0002938583602703497},\n", - " {'sql_condition': 'ELSE',\n", - " 'label_for_charts': 'All other comparisons',\n", - " 'm_probability': 0.9989518764510282,\n", - " 'u_probability': 0.9997061416397296}],\n", - " 'comparison_description': 'Exact match vs. anything else'}],\n", - " 'sql_dialect': 'duckdb',\n", - " 'linker_uid': '8i8mhvh3',\n", - " 'probability_two_random_records_match': 0.0019042857142857145}" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.save_model_to_json()" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "bb6ddd45-489a-40f7-93f6-b3d9d1c49753", - "metadata": {}, - "outputs": [], - "source": [ - "pred = linker.predict()" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "1b9fc0ad-3fce-4def-9116-66211e850221", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_rcluster_sha1_lcluster_sha1_rmatch_key
7997-2.0005310.199941lrb'\\xa9=)g{\\x96x7gq5\\xb4h\\xa5N\\xe0\\xc9\\xdd$\\xb8'b'\\xfc;2\\xecW\\xe5+g\\x97\\xf7\\xa0/&\\x1f\\xac\\xe1\\...0
7996-2.0005310.199941lrb'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f...0
4037-3.8362360.065432lrb'w\\xb8_\\x96e\\xec\\xd1\\xa7\\xe3P\\xab\\x15\\xb7\\x18...b'\\x91\\tP\\x81\\xdb^\\xf4]~\\xfd\\xe9e6\\x01\\xba\\xbc...0
4036-3.8362360.065432lrb'\\xbf\\x19E\\xa4\\xff\\x01\\x86L\\xfe\\xc5\\xde\\xc4\\x...b'\\x14\\x97p\\xda\\xaf$-^6A\\xdb\\xc0a\\xa2\\xa6\\x97\\...0
4035-3.8362360.065432lrb'\\xbf\\x19E\\xa4\\xff\\x01\\x86L\\xfe\\xc5\\xde\\xc4\\x...b'\\x14\\x97p\\xda\\xaf$-^6A\\xdb\\xc0a\\xa2\\xa6\\x97\\...0
........................
32-7.2558590.006500lrb'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS...b'\\r\\xcaA\\xa6\\xce\\xc5|4\\xba\\xffR(\\x9d\\xe5\\x14d...1
33-7.2558590.006500lrb'\\x8dcy\\xe3\\xb6|\\x07fCqy%\\x7f\\x1b\\xb4\\xbb\\x85...b'\\xda\\xad\\x05\\x0eP\\xe7\\x10\\xc6\\xa5K\\xa1h\\xdaF...1
34-7.2558590.006500lrb'\\x8dcy\\xe3\\xb6|\\x07fCqy%\\x7f\\x1b\\xb4\\xbb\\x85...b\"\\x9b\\x8e)\\x7f\\x83\\xaa\\x1f\\xca\\xe1\\xfa{@n('\\x...1
35-7.2558590.006500lrb'.\\x89\\xaa\\xd5\\x0f\\t\\xbe\\xbc@\\x12)_~\\xe6\\xb6\\...b'\\x17S\\xaf\\xc2\\x81-yeX\\x94\\xca\\xe2\\x0eo\\x0b\\x...1
36-7.2558590.006500lrb'.\\x89\\xaa\\xd5\\x0f\\t\\xbe\\xbc@\\x12)_~\\xe6\\xb6\\...b'\\x88\\x0f\\x1d\\xaf^\\x91\\xda\\xfe\\xdf\\x9a\\x9d(\\x...1
\n", - "

7998 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l source_dataset_r \\\n", - "7997 -2.000531 0.199941 l r \n", - "7996 -2.000531 0.199941 l r \n", - "4037 -3.836236 0.065432 l r \n", - "4036 -3.836236 0.065432 l r \n", - "4035 -3.836236 0.065432 l r \n", - "... ... ... ... ... \n", - "32 -7.255859 0.006500 l r \n", - "33 -7.255859 0.006500 l r \n", - "34 -7.255859 0.006500 l r \n", - "35 -7.255859 0.006500 l r \n", - "36 -7.255859 0.006500 l r \n", - "\n", - " cluster_sha1_l \\\n", - "7997 b'\\xa9=)g{\\x96x7gq5\\xb4h\\xa5N\\xe0\\xc9\\xdd$\\xb8' \n", - "7996 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "4037 b'w\\xb8_\\x96e\\xec\\xd1\\xa7\\xe3P\\xab\\x15\\xb7\\x18... \n", - "4036 b'\\xbf\\x19E\\xa4\\xff\\x01\\x86L\\xfe\\xc5\\xde\\xc4\\x... \n", - "4035 b'\\xbf\\x19E\\xa4\\xff\\x01\\x86L\\xfe\\xc5\\xde\\xc4\\x... \n", - "... ... \n", - "32 b'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS... \n", - "33 b'\\x8dcy\\xe3\\xb6|\\x07fCqy%\\x7f\\x1b\\xb4\\xbb\\x85... \n", - "34 b'\\x8dcy\\xe3\\xb6|\\x07fCqy%\\x7f\\x1b\\xb4\\xbb\\x85... \n", - "35 b'.\\x89\\xaa\\xd5\\x0f\\t\\xbe\\xbc@\\x12)_~\\xe6\\xb6\\... \n", - "36 b'.\\x89\\xaa\\xd5\\x0f\\t\\xbe\\xbc@\\x12)_~\\xe6\\xb6\\... \n", - "\n", - " cluster_sha1_r match_key \n", - "7997 b'\\xfc;2\\xecW\\xe5+g\\x97\\xf7\\xa0/&\\x1f\\xac\\xe1\\... 0 \n", - "7996 b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f... 0 \n", - "4037 b'\\x91\\tP\\x81\\xdb^\\xf4]~\\xfd\\xe9e6\\x01\\xba\\xbc... 0 \n", - "4036 b'\\x14\\x97p\\xda\\xaf$-^6A\\xdb\\xc0a\\xa2\\xa6\\x97\\... 0 \n", - "4035 b'\\x14\\x97p\\xda\\xaf$-^6A\\xdb\\xc0a\\xa2\\xa6\\x97\\... 0 \n", - "... ... ... \n", - "32 b'\\r\\xcaA\\xa6\\xce\\xc5|4\\xba\\xffR(\\x9d\\xe5\\x14d... 1 \n", - "33 b'\\xda\\xad\\x05\\x0eP\\xe7\\x10\\xc6\\xa5K\\xa1h\\xdaF... 1 \n", - "34 b\"\\x9b\\x8e)\\x7f\\x83\\xaa\\x1f\\xca\\xe1\\xfa{@n('\\x... 1 \n", - "35 b'\\x17S\\xaf\\xc2\\x81-yeX\\x94\\xca\\xe2\\x0eo\\x0b\\x... 1 \n", - "36 b'\\x88\\x0f\\x1d\\xaf^\\x91\\xda\\xfe\\xdf\\x9a\\x9d(\\x... 1 \n", - "\n", - "[7998 rows x 7 columns]" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pred.as_pandas_dataframe().sort_values(\"match_probability\", ascending=False)" - ] - }, - { - "cell_type": "markdown", - "id": "a15ab2ff-a17a-410e-af8f-cc8693d0d6ef", - "metadata": {}, - "source": [ - "# Refine for unit test\n", - "\n", - "It works, but just gives a terrible answer cause we're using it wrong. Let's persist for a while rather than refactoring all my unit tests.\n", - "\n", - "Let's see if I can insert that m param.\n", - "\n", - "It physically works! But I'm going to tweak it so a very simple deterministic unit test will work." - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "7e030de6-69a5-486e-9d0a-ce7b7ae3b1c8", - "metadata": {}, - "outputs": [], - "source": [ - "df_l = pd.read_csv(\"df_l.csv\")\n", - "df_r = pd.read_csv(\"df_r.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "id": "cf7d6f26-6e31-4651-844c-e2fb2e874aaa", - "metadata": {}, - "outputs": [], - "source": [ - "splink_settings={\n", - " \"link_type\": \"link_only\",\n", - " \"unique_id_column_name\": \"cluster_sha1\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " brl.block_on(\"crn\")\n", - " ],\n", - " \"comparisons\": [\n", - " cl.exact_match(\"crn\", m_probability_exact_match=1)\n", - " ],\n", - "}\n", - "\n", - "linker = DuckDBLinker(\n", - " input_table_or_tables=[df_l, df_r],\n", - " input_table_aliases=[\"l\", \"r\"],\n", - " settings_dict=splink_settings,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "3e809e75-2f96-4dbf-a304-f681391f7069", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 0.001.\n", - "This means that amongst all possible pairwise record comparisons, one in 1,000.00 are expected to match. With 6,000,000 total possible comparisons, we expect a total of around 6,000.00 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " deterministic_matching_rules=[\n", - " \"l.crn = r.crn\"\n", - " ],\n", - " recall=1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "id": "e52eada3-1d4e-4545-a3fe-b9798073b1c6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - crn (some m values are not trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(\n", - " max_pairs=1e4\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "id": "e153290c-879f-481d-a5bd-3da982ad69fa", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'crn':\n", - " m values not fully trained\n" - ] - } - ], - "source": [ - "pred = linker.predict()" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "id": "03e26e1b-6984-4702-b738-e41f39237b96", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_probabilitycluster_sha1_lcluster_sha1_rcluster_sha1_xcrn_xcluster_sha1_ycrn_y
359400.546715b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc...b'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x...b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc...01HHGX9CPBZF8HCV0EZ53PFCQEb'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x...01HHGX9CPBZF8HCV0EZ53PFCQE
00.546715b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f...b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...01HHGX9BHARZT77WHVWCYJSWSFb'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f...01HHGX9BHARZT77WHVWCYJSWSF
120.546715b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{'b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k...b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{'01HHGX9BHF9HS4Z9E3FYGY7R92b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k...01HHGX9BHF9HS4Z9E3FYGY7R92
240.546715b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X'b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa...b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X'01HHGX9BHG70V8V6ZXVTJPJ7PXb'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa...01HHGX9BHG70V8V6ZXVTJPJ7PX
360.546715b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x...b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8...b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x...01HHGX9BHH87FFA2CPCJRXNJJ7b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8...01HHGX9BHH87FFA2CPCJRXNJJ7
........................
1320.546715b'c\\\\e\\x1c\\xf4\\xc9\\xfdG\\xea\\xe1\\x8e\\x01\\xe3\\x0...b'2(\\xe3\\xbf\\x82\\\\\\xcf9\\xacUG\\xbc\\xa5\\x9dq3Z\\x...b'c\\\\e\\x1c\\xf4\\xc9\\xfdG\\xea\\xe1\\x8e\\x01\\xe3\\x0...01HHGX9BHXBJ5TE9FYN2CMPR8Gb'2(\\xe3\\xbf\\x82\\\\\\xcf9\\xacUG\\xbc\\xa5\\x9dq3Z\\x...01HHGX9BHXBJ5TE9FYN2CMPR8G
1440.546715b'\\xf1\\xa8\\xdaDx\\xcc\\x04\\xde\\x0bB\\xde\\x9d\\xd8\\...b'\\x8b\\x19\\xaf[F\\xa10I?\\xc2\\xca\\x7f\\xc6\\xc9\\x8...b'\\xf1\\xa8\\xdaDx\\xcc\\x04\\xde\\x0bB\\xde\\x9d\\xd8\\...01HHGX9BHXR5W5YVHR03GN8NEHb'\\x8b\\x19\\xaf[F\\xa10I?\\xc2\\xca\\x7f\\xc6\\xc9\\x8...01HHGX9BHXR5W5YVHR03GN8NEH
1560.546715b'\\xcaE\\x1ba5\\t e\\n\\xc4\\x8c\\xe2,\\xe3\\x1c\\xed\\x...b'\\xb3\\x07\\xf9\\x82\\xdcUB\\x02\\xd3\\xa1&\\x0f\\xa0\\...b'\\xcaE\\x1ba5\\t e\\n\\xc4\\x8c\\xe2,\\xe3\\x1c\\xed\\x...01HHGX9BHYX2QM6WVDCG77A1W4b'\\xb3\\x07\\xf9\\x82\\xdcUB\\x02\\xd3\\xa1&\\x0f\\xa0\\...01HHGX9BHYX2QM6WVDCG77A1W4
1680.546715b'\\xad\\x19\\x85\\\\\\xe4`\\x8b,!\\xb2\\xa5kO\\xe0\\x82\\...b'\\x10n\\xcee\\xb0\\xde{\\x9f\\xa3vz\\tb\\xa4i\\x83\\xc...b'\\xad\\x19\\x85\\\\\\xe4`\\x8b,!\\xb2\\xa5kO\\xe0\\x82\\...01HHGX9BHZVGFX3QJVQBGK8B83b'\\x10n\\xcee\\xb0\\xde{\\x9f\\xa3vz\\tb\\xa4i\\x83\\xc...01HHGX9BHZVGFX3QJVQBGK8B83
1800.546715b'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS...b'\\x8c\\xcf\\xea\\x1e\\xd2(V\\x9e\\x11t\\xd2+>*\\x01\\x...b'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS...01HHGX9BJ0AKH75FT9S15B2JSSb'\\x8c\\xcf\\xea\\x1e\\xd2(V\\x9e\\x11t\\xd2+>*\\x01\\x...01HHGX9BJ0AKH75FT9S15B2JSS
\n", - "

1000 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " match_probability cluster_sha1_l \\\n", - "35940 0.546715 b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc... \n", - "0 0.546715 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "12 0.546715 b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{' \n", - "24 0.546715 b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X' \n", - "36 0.546715 b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x... \n", - "... ... ... \n", - "132 0.546715 b'c\\\\e\\x1c\\xf4\\xc9\\xfdG\\xea\\xe1\\x8e\\x01\\xe3\\x0... \n", - "144 0.546715 b'\\xf1\\xa8\\xdaDx\\xcc\\x04\\xde\\x0bB\\xde\\x9d\\xd8\\... \n", - "156 0.546715 b'\\xcaE\\x1ba5\\t e\\n\\xc4\\x8c\\xe2,\\xe3\\x1c\\xed\\x... \n", - "168 0.546715 b'\\xad\\x19\\x85\\\\\\xe4`\\x8b,!\\xb2\\xa5kO\\xe0\\x82\\... \n", - "180 0.546715 b'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS... \n", - "\n", - " cluster_sha1_r \\\n", - "35940 b'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x... \n", - "0 b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f... \n", - "12 b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k... \n", - "24 b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa... \n", - "36 b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8... \n", - "... ... \n", - "132 b'2(\\xe3\\xbf\\x82\\\\\\xcf9\\xacUG\\xbc\\xa5\\x9dq3Z\\x... \n", - "144 b'\\x8b\\x19\\xaf[F\\xa10I?\\xc2\\xca\\x7f\\xc6\\xc9\\x8... \n", - "156 b'\\xb3\\x07\\xf9\\x82\\xdcUB\\x02\\xd3\\xa1&\\x0f\\xa0\\... \n", - "168 b'\\x10n\\xcee\\xb0\\xde{\\x9f\\xa3vz\\tb\\xa4i\\x83\\xc... \n", - "180 b'\\x8c\\xcf\\xea\\x1e\\xd2(V\\x9e\\x11t\\xd2+>*\\x01\\x... \n", - "\n", - " cluster_sha1_x \\\n", - "35940 b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc... \n", - "0 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "12 b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{' \n", - "24 b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X' \n", - "36 b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x... \n", - "... ... \n", - "132 b'c\\\\e\\x1c\\xf4\\xc9\\xfdG\\xea\\xe1\\x8e\\x01\\xe3\\x0... \n", - "144 b'\\xf1\\xa8\\xdaDx\\xcc\\x04\\xde\\x0bB\\xde\\x9d\\xd8\\... \n", - "156 b'\\xcaE\\x1ba5\\t e\\n\\xc4\\x8c\\xe2,\\xe3\\x1c\\xed\\x... \n", - "168 b'\\xad\\x19\\x85\\\\\\xe4`\\x8b,!\\xb2\\xa5kO\\xe0\\x82\\... \n", - "180 b'9$\\x90\\xe4\\x13\\xb99\\x9d\\xf4\\xae\\xb0\\x10\\xafS... \n", - "\n", - " crn_x \\\n", - "35940 01HHGX9CPBZF8HCV0EZ53PFCQE \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF \n", - "12 01HHGX9BHF9HS4Z9E3FYGY7R92 \n", - "24 01HHGX9BHG70V8V6ZXVTJPJ7PX \n", - "36 01HHGX9BHH87FFA2CPCJRXNJJ7 \n", - "... ... \n", - "132 01HHGX9BHXBJ5TE9FYN2CMPR8G \n", - "144 01HHGX9BHXR5W5YVHR03GN8NEH \n", - "156 01HHGX9BHYX2QM6WVDCG77A1W4 \n", - "168 01HHGX9BHZVGFX3QJVQBGK8B83 \n", - "180 01HHGX9BJ0AKH75FT9S15B2JSS \n", - "\n", - " cluster_sha1_y \\\n", - "35940 b'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x... \n", - "0 b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f... \n", - "12 b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k... \n", - "24 b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa... \n", - "36 b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8... \n", - "... ... \n", - "132 b'2(\\xe3\\xbf\\x82\\\\\\xcf9\\xacUG\\xbc\\xa5\\x9dq3Z\\x... \n", - "144 b'\\x8b\\x19\\xaf[F\\xa10I?\\xc2\\xca\\x7f\\xc6\\xc9\\x8... \n", - "156 b'\\xb3\\x07\\xf9\\x82\\xdcUB\\x02\\xd3\\xa1&\\x0f\\xa0\\... \n", - "168 b'\\x10n\\xcee\\xb0\\xde{\\x9f\\xa3vz\\tb\\xa4i\\x83\\xc... \n", - "180 b'\\x8c\\xcf\\xea\\x1e\\xd2(V\\x9e\\x11t\\xd2+>*\\x01\\x... \n", - "\n", - " crn_y \n", - "35940 01HHGX9CPBZF8HCV0EZ53PFCQE \n", - "0 01HHGX9BHARZT77WHVWCYJSWSF \n", - "12 01HHGX9BHF9HS4Z9E3FYGY7R92 \n", - "24 01HHGX9BHG70V8V6ZXVTJPJ7PX \n", - "36 01HHGX9BHH87FFA2CPCJRXNJJ7 \n", - "... ... \n", - "132 01HHGX9BHXBJ5TE9FYN2CMPR8G \n", - "144 01HHGX9BHXR5W5YVHR03GN8NEH \n", - "156 01HHGX9BHYX2QM6WVDCG77A1W4 \n", - "168 01HHGX9BHZVGFX3QJVQBGK8B83 \n", - "180 01HHGX9BJ0AKH75FT9S15B2JSS \n", - "\n", - "[1000 rows x 7 columns]" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " pred\n", - " .as_pandas_dataframe()\n", - " .filter([\"match_probability\", \"cluster_sha1_l\", \"cluster_sha1_r\"])\n", - " .merge(\n", - " df_l,\n", - " how=\"left\",\n", - " left_on=\"cluster_sha1_l\",\n", - " right_on=\"cluster_sha1\"\n", - " )\n", - " .merge(\n", - " df_r,\n", - " how=\"left\",\n", - " left_on=\"cluster_sha1_r\",\n", - " right_on=\"cluster_sha1\"\n", - " )\n", - " .drop_duplicates()\n", - " .sort_values(\"match_probability\", ascending=False)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "662c9816-08de-4c8c-93a0-889a45f7e3ce", - "metadata": {}, - "source": [ - "# Reshaping\n", - "\n", - "Just working through this." - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "id": "d005396a-66fe-4f9a-a729-319eda0adbea", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
probabilityleft_idright_id
00.546715b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x...b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f...
20.546715b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{'b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k...
40.546715b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X'b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa...
60.546715b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x...b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8...
80.546715b'e}\\x0e\\x1dA\\x8d\\xe1\\x13*\\xcd\\x80{7\\x180q\\xc7...b'b\\x9c}\\xb0!\\x9b\\x8f\\xad|\\xfb&\\xfa\\xb3\\x80\\t\\...
............
57740.546715b'\\xdc\\xabO\\xb8\\xf2\\xfe\\xdd\\x06\\x9f\\xb0\\x19\\xe...b'\\xbb4\\x12h\\x10\\xc8o\\xeb\\xb7.\\xfb\\xa4\\xae\\xe5...
57760.546715b'\\xa7#$$\\xab!\\x08\\xfbW\\xe8\\xc7\\x05\\x83iG\\x10h...b'\\xcd\\x18\\xfb/E\\xd2\\x08B4t\\xf2a{\\xfd\\xf5\\xa5\\...
57780.546715b'\\xedz\\x94;m\\xd2w\\x17g\\xdcjo\\x8a\\\\\\xa7\\xc2\\t\\...b'_U\\xba\\x1d\\xe7\\x9f\\xc9\\xad?\\xcd\\x85Z\\xd3\\x04...
59880.546715b'Y\\xbd\\x1c0\\xd0!\\xc7\\x17\\xa8\\x81\\xf4\\xc5\\xb1\\...b'L=\\x95\\x82J\\x81\\xc5A\\x05\\xbf:#\\t+\\xc0\\x80\\xd...
59900.546715b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc...b'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x...
\n", - "

1000 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " probability left_id \\\n", - "0 0.546715 b'\\x9a\\xd5\\x82\\xe9\\xd5\\x96[\\xf1O\\x92\\x0b\\x0e\\x... \n", - "2 0.546715 b'\\xa8G\\x1bvn\\x8e\\xa5\\x9e>t[\\xa9xj\\xfeX@\\xbcH{' \n", - "4 0.546715 b'A*T{\\xd0\\x96y_W\\x07`\\x0b#\\x94Fy7\\xc9\\xa6X' \n", - "6 0.546715 b'\\xed\\x83\\x16\\xca\\xe4\\x88o\\x8a\\xb5/\\x89\\x8f\\x... \n", - "8 0.546715 b'e}\\x0e\\x1dA\\x8d\\xe1\\x13*\\xcd\\x80{7\\x180q\\xc7... \n", - "... ... ... \n", - "5774 0.546715 b'\\xdc\\xabO\\xb8\\xf2\\xfe\\xdd\\x06\\x9f\\xb0\\x19\\xe... \n", - "5776 0.546715 b'\\xa7#$$\\xab!\\x08\\xfbW\\xe8\\xc7\\x05\\x83iG\\x10h... \n", - "5778 0.546715 b'\\xedz\\x94;m\\xd2w\\x17g\\xdcjo\\x8a\\\\\\xa7\\xc2\\t\\... \n", - "5988 0.546715 b'Y\\xbd\\x1c0\\xd0!\\xc7\\x17\\xa8\\x81\\xf4\\xc5\\xb1\\... \n", - "5990 0.546715 b'H\\xab\\xf0\\xcf)O\\xec\\xa7\\x96\\xd9\\x98t/\\x02\\xc... \n", - "\n", - " right_id \n", - "0 b'\\xe8LJ\\xac`\\xfd\\x17\\x94\\x00\\x11\\x81Y\\x8c\\x0f... \n", - "2 b'\\xc3~\\xf2\\xfe|\\x89\\x88\\x84\\xb4\\x0f\\xe9`\\x04k... \n", - "4 b'8\"\\xacm\\xca\\xb2I\\xb8\\xf9MY|6\\x85\\x1dm\\xc2\\xa... \n", - "6 b'9\\xa8\\x8b\\xc3\\xe8\\xf7d\\xf3\\xcf1t\\xfb\\x9f\\xd8... \n", - "8 b'b\\x9c}\\xb0!\\x9b\\x8f\\xad|\\xfb&\\xfa\\xb3\\x80\\t\\... \n", - "... ... \n", - "5774 b'\\xbb4\\x12h\\x10\\xc8o\\xeb\\xb7.\\xfb\\xa4\\xae\\xe5... \n", - "5776 b'\\xcd\\x18\\xfb/E\\xd2\\x08B4t\\xf2a{\\xfd\\xf5\\xa5\\... \n", - "5778 b'_U\\xba\\x1d\\xe7\\x9f\\xc9\\xad?\\xcd\\x85Z\\xd3\\x04... \n", - "5988 b'L=\\x95\\x82J\\x81\\xc5A\\x05\\xbf:#\\t+\\xc0\\x80\\xd... \n", - "5990 b'X\\x05\\xddi\\xe2\\xbd\\xf2u\\x15\\x87~W\\x0c\\xb1s\\x... \n", - "\n", - "[1000 rows x 3 columns]" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import ast\n", - "\n", - "(\n", - " pred\n", - " .as_pandas_dataframe()\n", - " .rename(\n", - " columns={\n", - " \"cluster_sha1_l\": \"left_id\",\n", - " \"cluster_sha1_r\": \"right_id\",\n", - " \"match_probability\": \"probability\",\n", - " }\n", - " )\n", - " .assign(\n", - " left_id=lambda df: df.left_id.apply(ast.literal_eval),\n", - " right_id=lambda df: df.right_id.apply(ast.literal_eval),\n", - " )\n", - " .filter([\"probability\", \"left_id\", \"right_id\"])\n", - " .drop_duplicates()\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_splink-iterpred.ipynb b/notebooks/models/splink/WL_splink-iterpred.ipynb deleted file mode 100644 index 2210a69..0000000 --- a/notebooks/models/splink/WL_splink-iterpred.ipynb +++ /dev/null @@ -1,2202 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "6d2cf574-09ed-4120-9bea-8564dfb43bb1", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c0090a19-89f2-4f87-bef1-a72f01cf6e74", - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow\n", - "import duckdb\n", - "import json\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "import time\n", - "# import networkx as nx\n", - "# import networkit as nk\n", - "import sys\n", - "# import dask.dataframe as dd\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "from splink.postgres.linker import PostgresLinker\n", - "from splink.connected_components import (\n", - " _cc_create_unique_id_cols,\n", - " solve_connected_components,\n", - " _cc_create_nodes_table,\n", - " _cc_generate_neighbours_representation,\n", - " _cc_generate_initial_representatives_table,\n", - " _cc_update_neighbours_first_iter,\n", - " _cc_update_representatives_first_iter,\n", - " _cc_generate_representatives_loop_cond,\n", - " _cc_update_representatives_loop_cond\n", - ")\n", - "\n", - "from cmf.data import utils as du\n", - "import cmf.locations as loc\n", - "from cmf.config import settings\n", - "\n", - "DATA_FULL = du.build_alias_path_dict(Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full')\n", - "del DATA_FULL['predictions']\n", - "PRED_PATH = Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full' / 'predictions.parquet'\n", - "PRED_PATH_2 = Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full' / 'predictions_2.parquet'" - ] - }, - { - "cell_type": "markdown", - "id": "fc0edb74-b974-4004-b6be-5a625b878c32", - "metadata": {}, - "source": [ - "Questions:\n", - "\n", - "1. Can we predict in batches?\n", - "2. If we predict in batches, do we get the same answer as when not?\n", - "3. Does this alleviate memory issues?\n", - "4. Does this work with clustering?\n", - "\n", - "29/6 update: didn't even need to do batch stuff. CLUSTERING is the problem -- predict is fine. Opens up new avenues.\n", - "\n", - "Let's see where clustering fails specifically.\n", - "\n", - "30/6: [This could be an option](https://github.com/moj-analytical-services/splink/discussions/1218). Predict in a glob, cluster in batches. Robin's assumptions all hold in our use case. The model is fixed because this is batching one run, records are only added because we're batching one set of predictions, and the records don't change because this is batching one run." - ] - }, - { - "cell_type": "markdown", - "id": "1ac8302c-9f46-49f0-a394-59b7ed0f6e94", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Repartition into multiple files" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "dadfc0a3-14e4-4c5a-85f4-2ffa8cd9ce62", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'hmrc_trade__exporters': \"'/home/jovyan/company_matching/data/processed/company-matching__partitioned/hmrc_trade__exporters'\",\n", - " 'dit_export_wins__wins_dataset': \"'/home/jovyan/company_matching/data/processed/company-matching__partitioned/dit_export_wins__wins_dataset'\",\n", - " 'dit_data_hub__companies': \"'/home/jovyan/company_matching/data/processed/company-matching__partitioned/dit_data_hub__companies'\",\n", - " 'companieshouse_companies': \"'/home/jovyan/company_matching/data/processed/company-matching__partitioned/companieshouse_companies'\"}" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_partitioned = {}\n", - "\n", - "for data in data_full.keys():\n", - " df = dd.read_parquet(data_full[data])\n", - " df = df.repartition(partition_size=\"100MB\")\n", - " new_dir = Path(loc.DATA_SUBDIR['processed']) / 'company-matching__partitioned' / data\n", - " df.to_parquet(new_dir)\n", - " data_partitioned[data] = f\"'{new_dir.as_posix()}'\"\n", - " \n", - "data_partitioned" - ] - }, - { - "cell_type": "markdown", - "id": "d7aff7fc-ff27-4e10-b2c6-dcddb19f3880", - "metadata": {}, - "source": [ - "## Generate predictions and stash" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "959219b0-f2b3-4558-be14-3c20dd082ceb", - "metadata": {}, - "outputs": [], - "source": [ - "json_raw = mlflow.artifacts.load_text(\n", - " artifact_uri=\"runs:/22ce217706c54650ac34f59cb6a45960/model/companies_matching_model.json\"\n", - ")\n", - "json_settings = json.loads(json_raw)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4ec62f95-1fb7-4266-a0e8-205c530bf234", - "metadata": {}, - "outputs": [], - "source": [ - "connection = duckdb.connect()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "995f791c-464a-4624-bb7f-d36cf58c7156", - "metadata": {}, - "outputs": [], - "source": [ - "linker = DuckDBLinker(\n", - " list(DATA_FULL.values()),\n", - " settings_dict=settings,\n", - " connection=':temporary:',\n", - " input_table_aliases=list(DATA_FULL.keys()),\n", - ")\n", - "linker.load_model(json_settings)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1cf4ad10-b7ce-4adf-9dfa-919d13f0e1bd", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'comp_num_clean':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "predictions = linker.predict(threshold_match_probability=0.9)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "19a3c751-f95b-4cbf-ad44-cc24c13e516f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────────┬───────────────┬────────────┬──────────────┬───┬─────────────┬──────────┬──────────────┬──────────────┐\n", - "│ database_name │ database_size │ block_size │ total_blocks │ … │ free_blocks │ wal_size │ memory_usage │ memory_limit │\n", - "│ varchar │ varchar │ int64 │ int64 │ │ int64 │ varchar │ varchar │ varchar │\n", - "├───────────────┼───────────────┼────────────┼──────────────┼───┼─────────────┼──────────┼──────────────┼──────────────┤\n", - "│ memory │ 0 bytes │ 0 │ 0 │ … │ 0 │ 0 bytes │ 10.8GB │ 26.4GB │\n", - "├───────────────┴───────────────┴────────────┴──────────────┴───┴─────────────┴──────────┴──────────────┴──────────────┤\n", - "│ 1 rows 9 columns (8 shown) │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "connection.query(\"\"\"\n", - " pragma database_size;\n", - " call pragma_database_size();\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ddfbca05-c1d5-421a-9be1-33bd87e6f004", - "metadata": {}, - "outputs": [], - "source": [ - "connection.query(f\"\"\"\n", - " copy {predictions.physical_name}\n", - " to '{PRED_PATH_2.as_posix()}'\n", - " (format parquet);\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9d4aef28-0cf9-4925-ac23-3457797bfe49", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌────────────────────┬────────────────────┬───┬──────────────────────┬──────────────────────┬───────────┐\n", - "│ match_weight │ match_probability │ … │ unique_id_l │ unique_id_r │ match_key │\n", - "│ double │ double │ │ varchar │ varchar │ varchar │\n", - "├────────────────────┼────────────────────┼───┼──────────────────────┼──────────────────────┼───────────┤\n", - "│ 1.4157836817616756 │ 0.7273753031699655 │ … │ b18c76fc-a30d-e411… │ 11f864f6-09bc-4cf8… │ 0 │\n", - "│ 20.075070679022463 │ 0.9999990946819749 │ … │ 7a7fd6b2-4f0e-e411… │ 19a1c784-0e8c-4b9b… │ 0 │\n", - "│ 18.852678257686016 │ 0.9999978875938246 │ … │ e7275be8-7e11-e411… │ 3d895056-4ffd-4a15… │ 0 │\n", - "│ 21.338105084856256 │ 0.999999622783957 │ … │ 6afa126b-a911-e411… │ 03ebd8a5-f065-423a… │ 0 │\n", - "│ 18.20060156110632 │ 0.9999966805085885 │ … │ f7f4ddb5-4d12-e411… │ 600dcf7f-7087-46f3… │ 0 │\n", - "│ 22.075070679022463 │ 0.9999997736703401 │ … │ a2af958c-6d12-e411… │ 2cdace24-f936-48b6… │ 0 │\n", - "│ 21.660033179743618 │ 0.9999996982271429 │ … │ 758d33ef-7612-e411… │ a45178bf-d2b8-42bf… │ 0 │\n", - "│ 8.697196988889726 │ 0.9975965352319727 │ … │ 25d45568-4713-e411… │ 76b9acb6-9839-4b3a… │ 0 │\n", - "│ 17.267715756964858 │ 0.9999936628082473 │ … │ 2ad5730d-0718-e411… │ 33c1c23d-126a-4a48… │ 0 │\n", - "│ 1.8339107565946287 │ 0.7809429788614763 │ … │ 8ba7f427-b71b-e411… │ ec4c2222-701f-4c6f… │ 0 │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ 1.2508575379009217 │ 0.7041269888047618 │ … │ LP015585 │ SL015585 │ 0 │\n", - "│ 12.290310605733186 │ 0.9998003997147098 │ … │ CE005649 │ OE005649 │ 0 │\n", - "│ 1.2477679623468125 │ 0.7036806441393585 │ … │ 00649920 │ NI649920 │ 0 │\n", - "│ 1.8327304630679686 │ 0.7808029905433105 │ … │ NI655088 │ SC655088 │ 0 │\n", - "│ 20.660033179743618 │ 0.9999993964544678 │ … │ 03102371 │ fff1057e-6785-4f33… │ 0 │\n", - "│ 34.441450699323646 │ 0.9999999999571363 │ … │ 10595685 │ 7b3f4c93-006d-478a… │ 0 │\n", - "│ 1.2477679623468125 │ 0.7036806441393585 │ … │ NI036254 │ SL036254 │ 0 │\n", - "│ 2.4157836817616753 │ 0.8421740218644254 │ … │ SC315210 │ d6f51b21-a98c-4205… │ 0 │\n", - "│ 1.8327304630679686 │ 0.7808029905433105 │ … │ OC400419 │ SC400419 │ 0 │\n", - "│ 1.8327304630679686 │ 0.7808029905433105 │ … │ NI621269 │ SC621269 │ 0 │\n", - "├────────────────────┴────────────────────┴───┴──────────────────────┴──────────────────────┴───────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 7 columns (5 shown) │\n", - "└───────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "connection.query(f\"\"\"\n", - " select *\n", - " from '{PRED_PATH.as_posix()}'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "acafdef2-9932-48f4-a47d-9886b3a9b147", - "metadata": {}, - "outputs": [], - "source": [ - "du.data_workspace_write(\n", - " schema = \"_user_eaf4fd9a\",\n", - " table = \"lge_all_predictions\",\n", - " df = pd.read_parquet(PRED_PATH),\n", - " if_exists = \"replace\",\n", - " chunksize = int(1e6)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "581b5bc0-49f4-48f1-b3de-544c358e5f6a", - "metadata": {}, - "source": [ - "## Fix clustering" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "43db4d83-1fed-492c-9cb5-06481e66f942", - "metadata": {}, - "outputs": [], - "source": [ - "df_predict = pd.read_parquet(PRED_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9aa4cb33-07ad-477a-b8c3-58a36dba855f", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'linker' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mlinker\u001b[49m\u001b[38;5;241m.\u001b[39mquery_sql(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mselect * from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdb_predict\u001b[38;5;241m.\u001b[39mphysical_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'linker' is not defined" - ] - } - ], - "source": [ - "linker.query_sql(f\"select * from {db_predict.physical_name}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "8462e223-80f8-46f8-8684-1c2e36d6bb9d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hmrc_trade__exporters 75127057\n", - "companieshouse_companies 1937700\n", - "dit_data_hub__companies 1019056\n", - "dit_export_wins__wins_dataset 145787\n", - "Name: source_dataset_l, dtype: int64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "hmrc_trade__exporters 77857564\n", - "dit_export_wins__wins_dataset 176950\n", - "companieshouse_companies 103585\n", - "dit_data_hub__companies 91501\n", - "Name: source_dataset_r, dtype: int64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict.source_dataset_l.value_counts()\n", - "df_predict.source_dataset_r.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "635d7473-e598-401b-ad3d-48cbc1b5e2b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "hmrc_trade__exporters 76194847\n", - "companieshouse_companies 2155706\n", - "dit_data_hub__companies 1078847\n", - "dit_export_wins__wins_dataset 147565\n", - "Name: source_dataset_l, dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "hmrc_trade__exporters 79091872\n", - "companieshouse_companies 196660\n", - "dit_export_wins__wins_dataset 186607\n", - "dit_data_hub__companies 101826\n", - "Name: source_dataset_r, dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict.source_dataset_l.value_counts()\n", - "df_predict.source_dataset_r.value_counts()" - ] - }, - { - "cell_type": "markdown", - "id": "5048c3fb-5b2a-43b7-adac-fa90b88d4c17", - "metadata": {}, - "source": [ - "### NetworkX\n", - "\n", - "DuckDB is performing a graph operation on a relational database -- no wonder it's running out of memory. This approach makes sense when you've got a cluster to play with and want to keep stuff SQL first, but we don't and don't.\n", - "\n", - "Splink checks its connected components clustering using `networkx`. Let's try promoting it to our preferred method.\n", - "\n", - "A problem I can see emerging in this method is when the unique ID of one table is (possibly by chance) the same as a unique ID in another. I believe this is either quite likely, when company ID has been used, or almost impossible, when it's a UUID. But it needs checking and I haven't done it yet." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2b83a13c-2e46-4e96-a34e-f5891fe2b57f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rmatch_key
01.4157840.727375dit_data_hub__companiesdit_export_wins__wins_datasetb18c76fc-a30d-e411-8a2b-e4115bead28a11f864f6-09bc-4cf8-969e-ae790c28aec70
120.0750710.999999dit_data_hub__companiesdit_export_wins__wins_dataset7a7fd6b2-4f0e-e411-8a2b-e4115bead28a19a1c784-0e8c-4b9b-b40f-f4daa5d9bd010
218.8526780.999998dit_data_hub__companiesdit_export_wins__wins_datasete7275be8-7e11-e411-8a2b-e4115bead28a3d895056-4ffd-4a15-91d3-05e6def6e6060
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l \\\n", - "0 1.415784 0.727375 dit_data_hub__companies \n", - "1 20.075071 0.999999 dit_data_hub__companies \n", - "2 18.852678 0.999998 dit_data_hub__companies \n", - "\n", - " source_dataset_r unique_id_l \\\n", - "0 dit_export_wins__wins_dataset b18c76fc-a30d-e411-8a2b-e4115bead28a \n", - "1 dit_export_wins__wins_dataset 7a7fd6b2-4f0e-e411-8a2b-e4115bead28a \n", - "2 dit_export_wins__wins_dataset e7275be8-7e11-e411-8a2b-e4115bead28a \n", - "\n", - " unique_id_r match_key \n", - "0 11f864f6-09bc-4cf8-969e-ae790c28aec7 0 \n", - "1 19a1c784-0e8c-4b9b-b40f-f4daa5d9bd01 0 \n", - "2 3d895056-4ffd-4a15-91d3-05e6def6e606 0 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "84db0b40-5fa2-4107-a5e3-31e3742b79fd", - "metadata": {}, - "source": [ - "#### First try" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "abf7ac8c-d0f9-4d5c-9c29-9b5e47b54130", - "metadata": {}, - "outputs": [], - "source": [ - "G = nx.from_pandas_edgelist(\n", - " df = df_predict.sample(1_000_000),\n", - " source = 'unique_id_l',\n", - " target = 'unique_id_r',\n", - " edge_attr = 'match_probability'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d3713736-1f70-4b5f-a875-c09e15d6749b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
node_idrepresentative
06147b00b-0b1f-4941-ad03-8589005229ba018de358-52d9-4df8-aa53-7e9b068c7237
17cd96a12-4ac8-4a58-bd4c-5649edea565c018de358-52d9-4df8-aa53-7e9b068c7237
2018de358-52d9-4df8-aa53-7e9b068c7237018de358-52d9-4df8-aa53-7e9b068c7237
\n", - "
" - ], - "text/plain": [ - " node_id representative\n", - "0 6147b00b-0b1f-4941-ad03-8589005229ba 018de358-52d9-4df8-aa53-7e9b068c7237\n", - "1 7cd96a12-4ac8-4a58-bd4c-5649edea565c 018de358-52d9-4df8-aa53-7e9b068c7237\n", - "2 018de358-52d9-4df8-aa53-7e9b068c7237 018de358-52d9-4df8-aa53-7e9b068c7237" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rows = []\n", - "\n", - "for cluster in nx.connected_components(G):\n", - " m = min(list(cluster))\n", - " for n in cluster:\n", - " row = {\"node_id\": n, \"representative\": m}\n", - " rows.append(row)\n", - " \n", - "clusters = pd.DataFrame(rows)\n", - "\n", - "clusters.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "a8ec63e9-2a41-401b-878e-60df42259059", - "metadata": {}, - "source": [ - "#### More advanced -- add attributes" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a92bce4d-fd18-4345-95a7-3ae042180622", - "metadata": {}, - "outputs": [], - "source": [ - "df_sample = df_predict.sample(30_000_000)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "8d74eecc-17e3-4f09-a7f2-ddfcb7f29848", - "metadata": {}, - "outputs": [], - "source": [ - "G = nx.from_pandas_edgelist(\n", - " df = df_sample,\n", - " source = 'unique_id_l',\n", - " target = 'unique_id_r',\n", - " edge_attr = 'match_probability'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "fdb41080-0566-4152-9ed6-3354710a7f1b", - "metadata": {}, - "outputs": [], - "source": [ - "def get_dataset_attributes(df):\n", - " attr_l = (\n", - " df[['unique_id_l', 'source_dataset_l']]\n", - " .rename(columns={\n", - " 'unique_id_l': 'unique_id',\n", - " 'source_dataset_l': 'source_dataset'\n", - " })\n", - " )\n", - " attr_r = (\n", - " df[['unique_id_r', 'source_dataset_r']]\n", - " .rename(columns={\n", - " 'unique_id_r': 'unique_id',\n", - " 'source_dataset_r': 'source_dataset'\n", - " })\n", - " )\n", - " attr_all = (\n", - " pd.concat([attr_r, attr_r])\n", - " .drop_duplicates()\n", - " # .groupby('unique_id')\n", - " # .agg(lambda x: x.tolist())\n", - " # .to_dict('index')\n", - " )\n", - " attr_dict = (\n", - " pd.crosstab(\n", - " attr_all.unique_id, \n", - " attr_all.source_dataset\n", - " )\n", - " .astype(bool)\n", - " .to_dict('index')\n", - " )\n", - " return attr_dict" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "eee7be61-bbfe-48fc-9b02-818bd31ec741", - "metadata": {}, - "outputs": [], - "source": [ - "attr_dict = get_dataset_attributes(df_sample)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "1d634b1d-f226-4bc4-b226-1b910ec22eaf", - "metadata": {}, - "outputs": [], - "source": [ - "nx.set_node_attributes(G, attr_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "22578508-5dcf-44f7-987e-575944ff1e2d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "nodes = (\n", - " node\n", - " for node, data\n", - " in G.nodes(data=True)\n", - " if data.get(\"dit_data_hub__companies\") or data.get(\"companieshouse_companies\")\n", - ")\n", - "subgraph = G.subgraph(nodes)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1824d82e-a4b9-49f3-8461-d7f808599d50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'companieshouse_companies': True,\n", - " 'dit_data_hub__companies': False,\n", - " 'dit_export_wins__wins_dataset': False,\n", - " 'hmrc_trade__exporters': False}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dict(subgraph.nodes(data=True))['CS002474']" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "e2df2cb0-783c-4609-b245-78fbc00998cb", - "metadata": {}, - "outputs": [], - "source": [ - "cc = next(nx.connected_components(G))" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "a7dba50a-0a9a-4499-a411-4ee954ff513a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'1020227',\n", - " '1050280',\n", - " '1081920',\n", - " '1113894',\n", - " '1146012',\n", - " '115793',\n", - " '1178802',\n", - " '1211923',\n", - " '1243927',\n", - " '1275725',\n", - " '1309008',\n", - " '1341290',\n", - " '1373418',\n", - " '1404271',\n", - " '1434964',\n", - " '1466577',\n", - " '146684',\n", - " '1498805',\n", - " '1531446',\n", - " '1563788',\n", - " '1595197',\n", - " '1626761',\n", - " '1653134',\n", - " '1678259',\n", - " '1705959',\n", - " '1735677',\n", - " '1766545',\n", - " '177113',\n", - " '1798224',\n", - " '1828609',\n", - " '1859746',\n", - " '1906806',\n", - " '1952505',\n", - " '1999193',\n", - " '2050893',\n", - " '208085',\n", - " '2103799',\n", - " '21442',\n", - " '2158920',\n", - " '2213628',\n", - " '2268147',\n", - " '2321383',\n", - " '2375462',\n", - " '238521',\n", - " '2430331',\n", - " '2485096',\n", - " '2534850',\n", - " '2587205',\n", - " '2643247',\n", - " '267035',\n", - " '2698244',\n", - " '2753109',\n", - " '2807557',\n", - " '2861847',\n", - " '2915462',\n", - " '2969226',\n", - " '297623',\n", - " '3024287',\n", - " '3080072',\n", - " '3134161',\n", - " '3185286',\n", - " '3237603',\n", - " '3293200',\n", - " '329738',\n", - " '3347676',\n", - " '361251',\n", - " '393394',\n", - " '424702',\n", - " '456483',\n", - " '487547',\n", - " '518626',\n", - " '53448',\n", - " '550852',\n", - " '582506',\n", - " '612006',\n", - " '642319',\n", - " '673809',\n", - " '705008',\n", - " '736249',\n", - " '767495',\n", - " '798902',\n", - " '830441',\n", - " '84924',\n", - " '862270',\n", - " '893797',\n", - " '926169',\n", - " '958838',\n", - " '990214'}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cc" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "d15efbb7-8622-48a3-b486-2eb92cfde335", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'018de358-52d9-4df8-aa53-7e9b068c7237',\n", - " '6147b00b-0b1f-4941-ad03-8589005229ba',\n", - " '7cd96a12-4ac8-4a58-bd4c-5649edea565c'}" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nx.node_connected_component(G,'6147b00b-0b1f-4941-ad03-8589005229ba')" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "cf41cc76-232d-4a49-afe4-39708398ab5b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
node_idrepresentative
06147b00b-0b1f-4941-ad03-8589005229ba018de358-52d9-4df8-aa53-7e9b068c7237
17cd96a12-4ac8-4a58-bd4c-5649edea565c018de358-52d9-4df8-aa53-7e9b068c7237
2018de358-52d9-4df8-aa53-7e9b068c7237018de358-52d9-4df8-aa53-7e9b068c7237
\n", - "
" - ], - "text/plain": [ - " node_id representative\n", - "0 6147b00b-0b1f-4941-ad03-8589005229ba 018de358-52d9-4df8-aa53-7e9b068c7237\n", - "1 7cd96a12-4ac8-4a58-bd4c-5649edea565c 018de358-52d9-4df8-aa53-7e9b068c7237\n", - "2 018de358-52d9-4df8-aa53-7e9b068c7237 018de358-52d9-4df8-aa53-7e9b068c7237" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rows = []\n", - "\n", - "for cluster in nx.connected_components(G):\n", - " m = min(list(cluster))\n", - " for n in cluster:\n", - " row = {\"node_id\": n, \"representative\": m}\n", - " rows.append(row)\n", - " \n", - "clusters = pd.DataFrame(rows)\n", - "\n", - "clusters.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "a8316fee-b83a-4f93-80a0-908c8e83f410", - "metadata": {}, - "source": [ - "### Iterative clustering\n", - "\n", - "See top of file. We're going to cluster this iteratively, then combine them at the end, because [these assumptions hold](https://github.com/moj-analytical-services/splink/discussions/1218)." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "914c9552-6f49-432e-9228-f2615c197a10", - "metadata": {}, - "outputs": [], - "source": [ - "df_predict_to_sample = df_predict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "571df32d-4501-407d-a315-e3552732e922", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cluster 0\n", - "df_predict_to_sample shape: (79576965, 7)\n", - "Sampling complete\n", - "predict_sample shape: (100000, 7)\n", - "df_predict_to_sample shape: (79476965, 7)\n" - ] - } - ], - "source": [ - "clusters = []\n", - "\n", - "for i in range(2):\n", - " print(f\"Cluster {i}\")\n", - " print(f\"df_predict_to_sample shape: {df_predict_to_sample.shape}\")\n", - " \n", - " predict_sample = df_predict_to_sample.sample(100_000)\n", - " df_predict_to_sample = df_predict_to_sample.drop(predict_sample.index)\n", - " \n", - " print(\"Sampling complete\")\n", - " print(f\"predict_sample shape: {predict_sample.shape}\")\n", - " print(f\"df_predict_to_sample shape: {df_predict_to_sample.shape}\")\n", - " \n", - " linker = DuckDBLinker(\n", - " list(DATA_FULL.values()),\n", - " settings_dict=settings,\n", - " connection=':memory:',\n", - " input_table_aliases=list(DATA_FULL.keys()),\n", - " )\n", - " \n", - " linker.load_model(json_settings)\n", - "\n", - " db_predict = linker.register_table(\n", - " predict_sample, \n", - " \"__splink__df_predict\",\n", - " overwrite=True\n", - " )\n", - "\n", - " clusters_sample = linker.cluster_pairwise_predictions_at_threshold(\n", - " db_predict,\n", - " threshold_match_probability=0.7,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - " )\n", - " \n", - " clusters.append(clusters_sample.as_pandas_dataframe())\n", - " \n", - "clusters" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "46a47e3f-a0b3-4044-9a49-45a67f3bb397", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rmatch_keycluster_id_lcluster_id_r
01.8327300.780803companieshouse_companiescompanieshouse_companies00038751FC0387510companieshouse_companies-__-00038751companieshouse_companies-__-00038751
17.7531640.995386companieshouse_companieshmrc_trade__exporters0004369418309641companieshouse_companies-__-00043694companieshouse_companies-__-00043694
27.7372230.995335companieshouse_companieshmrc_trade__exporters0004591624108101companieshouse_companies-__-00045916companieshouse_companies-__-00045916
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l \\\n", - "0 1.832730 0.780803 companieshouse_companies \n", - "1 7.753164 0.995386 companieshouse_companies \n", - "2 7.737223 0.995335 companieshouse_companies \n", - "\n", - " source_dataset_r unique_id_l unique_id_r match_key \\\n", - "0 companieshouse_companies 00038751 FC038751 0 \n", - "1 hmrc_trade__exporters 00043694 1830964 1 \n", - "2 hmrc_trade__exporters 00045916 2410810 1 \n", - "\n", - " cluster_id_l cluster_id_r \n", - "0 companieshouse_companies-__-00038751 companieshouse_companies-__-00038751 \n", - "1 companieshouse_companies-__-00043694 companieshouse_companies-__-00043694 \n", - "2 companieshouse_companies-__-00045916 companieshouse_companies-__-00045916 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rmatch_keycluster_id_lcluster_id_r
01.8327300.780803companieshouse_companiescompanieshouse_companies00038751FC0387510companieshouse_companies-__-00038751companieshouse_companies-__-00038751
17.7531640.995386companieshouse_companieshmrc_trade__exporters0004369418309641companieshouse_companies-__-00043694companieshouse_companies-__-00043694
27.7372230.995335companieshouse_companieshmrc_trade__exporters0004591624108101companieshouse_companies-__-00045916companieshouse_companies-__-00045916
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l \\\n", - "0 1.832730 0.780803 companieshouse_companies \n", - "1 7.753164 0.995386 companieshouse_companies \n", - "2 7.737223 0.995335 companieshouse_companies \n", - "\n", - " source_dataset_r unique_id_l unique_id_r match_key \\\n", - "0 companieshouse_companies 00038751 FC038751 0 \n", - "1 hmrc_trade__exporters 00043694 1830964 1 \n", - "2 hmrc_trade__exporters 00045916 2410810 1 \n", - "\n", - " cluster_id_l cluster_id_r \n", - "0 companieshouse_companies-__-00038751 companieshouse_companies-__-00038751 \n", - "1 companieshouse_companies-__-00043694 companieshouse_companies-__-00043694 \n", - "2 companieshouse_companies-__-00045916 companieshouse_companies-__-00045916 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clusters[0].head(3)\n", - "clusters[1].head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "382805eb-7947-488b-b89c-c092b53ac746", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rmatch_keycluster_id_lcluster_id_r
01.8327300.780803companieshouse_companiescompanieshouse_companies00038751FC0387510companieshouse_companies-__-00038751companieshouse_companies-__-00038751
17.7531640.995386companieshouse_companieshmrc_trade__exporters0004369418309641companieshouse_companies-__-00043694companieshouse_companies-__-00043694
27.7372230.995335companieshouse_companieshmrc_trade__exporters0004591624108101companieshouse_companies-__-00045916companieshouse_companies-__-00045916
38.5170970.997278companieshouse_companieshmrc_trade__exporters0004886025055041companieshouse_companies-__-00048860companieshouse_companies-__-00048860
48.6600550.997534companieshouse_companieshmrc_trade__exporters0004937110391691companieshouse_companies-__-00049371companieshouse_companies-__-00049371
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l \\\n", - "0 1.832730 0.780803 companieshouse_companies \n", - "1 7.753164 0.995386 companieshouse_companies \n", - "2 7.737223 0.995335 companieshouse_companies \n", - "3 8.517097 0.997278 companieshouse_companies \n", - "4 8.660055 0.997534 companieshouse_companies \n", - "\n", - " source_dataset_r unique_id_l unique_id_r match_key \\\n", - "0 companieshouse_companies 00038751 FC038751 0 \n", - "1 hmrc_trade__exporters 00043694 1830964 1 \n", - "2 hmrc_trade__exporters 00045916 2410810 1 \n", - "3 hmrc_trade__exporters 00048860 2505504 1 \n", - "4 hmrc_trade__exporters 00049371 1039169 1 \n", - "\n", - " cluster_id_l cluster_id_r \n", - "0 companieshouse_companies-__-00038751 companieshouse_companies-__-00038751 \n", - "1 companieshouse_companies-__-00043694 companieshouse_companies-__-00043694 \n", - "2 companieshouse_companies-__-00045916 companieshouse_companies-__-00045916 \n", - "3 companieshouse_companies-__-00048860 companieshouse_companies-__-00048860 \n", - "4 companieshouse_companies-__-00049371 companieshouse_companies-__-00049371 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rmatch_keycluster_id_lcluster_id_r
01.8327300.780803companieshouse_companiescompanieshouse_companies00038751FC0387510companieshouse_companies-__-00038751companieshouse_companies-__-00038751
17.7531640.995386companieshouse_companieshmrc_trade__exporters0004369418309641companieshouse_companies-__-00043694companieshouse_companies-__-00043694
27.7372230.995335companieshouse_companieshmrc_trade__exporters0004591624108101companieshouse_companies-__-00045916companieshouse_companies-__-00045916
38.5170970.997278companieshouse_companieshmrc_trade__exporters0004886025055041companieshouse_companies-__-00048860companieshouse_companies-__-00048860
48.6600550.997534companieshouse_companieshmrc_trade__exporters0004937110391691companieshouse_companies-__-00049371companieshouse_companies-__-00049371
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l \\\n", - "0 1.832730 0.780803 companieshouse_companies \n", - "1 7.753164 0.995386 companieshouse_companies \n", - "2 7.737223 0.995335 companieshouse_companies \n", - "3 8.517097 0.997278 companieshouse_companies \n", - "4 8.660055 0.997534 companieshouse_companies \n", - "\n", - " source_dataset_r unique_id_l unique_id_r match_key \\\n", - "0 companieshouse_companies 00038751 FC038751 0 \n", - "1 hmrc_trade__exporters 00043694 1830964 1 \n", - "2 hmrc_trade__exporters 00045916 2410810 1 \n", - "3 hmrc_trade__exporters 00048860 2505504 1 \n", - "4 hmrc_trade__exporters 00049371 1039169 1 \n", - "\n", - " cluster_id_l cluster_id_r \n", - "0 companieshouse_companies-__-00038751 companieshouse_companies-__-00038751 \n", - "1 companieshouse_companies-__-00043694 companieshouse_companies-__-00043694 \n", - "2 companieshouse_companies-__-00045916 companieshouse_companies-__-00045916 \n", - "3 companieshouse_companies-__-00048860 companieshouse_companies-__-00048860 \n", - "4 companieshouse_companies-__-00049371 companieshouse_companies-__-00049371 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.query_sql(f\"\"\"\n", - " select * from {clusters[0].physical_name} limit 5\n", - "\"\"\")\n", - "linker.query_sql(f\"\"\"\n", - " select * from {clusters[1].physical_name} limit 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "e1df4a31-49d3-4279-ab57-fc472d654690", - "metadata": {}, - "source": [ - "### Smaller dataset test\n", - "\n", - "Trying this at .9 instead of .7. Have loaded `PRED_PATH_2` for the below." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "69c5af24-b31a-4874-902c-d79215ceba7b", - "metadata": {}, - "outputs": [], - "source": [ - "db_predict = linker.register_table(\n", - " df_predict, \n", - " \"__splink__df_predict\",\n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4588c04a-085a-4167-844e-d77683489edb", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Completed iteration 1, root rows count 7186\n", - "Completed iteration 2, root rows count 103\n", - "Completed iteration 3, root rows count 177\n", - "Completed iteration 4, root rows count 2\n", - "Completed iteration 5, root rows count 6\n", - "Completed iteration 6, root rows count 1\n", - "Completed iteration 7, root rows count 0\n" - ] - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " db_predict,\n", - " threshold_match_probability=0.9,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "fde040e1-5809-429a-aff3-515ce5a8d38d", - "metadata": {}, - "source": [ - "Crash crash crash crash crash" - ] - }, - { - "cell_type": "markdown", - "id": "b19fddc1-3651-47d2-90ab-3cb7f0810a79", - "metadata": {}, - "source": [ - "### Clustering fail diagnosis\n", - "\n", - "We're stepping through `linker.cluster_pairwise_predictions_at_threshold` to see what crashes the kernel. [Source](https://github.com/moj-analytical-services/splink/blob/56833b6fe6692de72530083f51dfdbad29c0fd33/splink/linker.py#L1953)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "69407d6a-9e0c-4ec8-88cf-991aaf0d2527", - "metadata": {}, - "outputs": [], - "source": [ - "# linker.cluster_pairwise_predictions_at_threshold(\n", - "# f\"'{pred_path.as_posix()}'\",\n", - "# threshold_match_probability=0.7,\n", - "# pairwise_formatting=True,\n", - "# filter_pairwise_format_for_clusters=False,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "dc18a9ed-5f37-4a42-88de-4862c26b63d7", - "metadata": {}, - "outputs": [], - "source": [ - "concat_with_tf = linker._initialise_df_concat_with_tf(predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1e79e678-8aed-4425-8fec-00ef067eb6f4", - "metadata": {}, - "outputs": [], - "source": [ - "edges_table = _cc_create_unique_id_cols(\n", - " linker,\n", - " concat_with_tf.physical_name,\n", - " predictions.physical_name,\n", - " 0.7,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "a5e2160f-4c92-4a6d-a2eb-deb0f3811fd4", - "metadata": {}, - "source": [ - "Crashed in the connected components function. Let's break it down." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "66aa83f9-7d39-4eda-88f9-b3634335e9f7", - "metadata": {}, - "outputs": [], - "source": [ - "# cc = solve_connected_components(\n", - "# linker,\n", - "# edges_table,\n", - "# predictions,\n", - "# concat_with_tf,\n", - "# pairwise_output = True,\n", - "# filter_pairwise_format_for_clusters = False,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "id": "f09da443-d7e8-4ec6-af84-efa1a2526912", - "metadata": {}, - "source": [ - "This is within `solve_connected_components`." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "004e0579-5f26-46e9-aced-2c49f4ed56ca", - "metadata": {}, - "outputs": [], - "source": [ - "input_dfs = [edges_table]\n", - "input_dfs.append(concat_with_tf)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "9e31f4e8-8aa7-4ca3-a348-24e96efe325e", - "metadata": {}, - "outputs": [], - "source": [ - "# Create our initial node and neighbours tables\n", - "sql = _cc_create_nodes_table(linker, False)\n", - "linker._enqueue_sql(sql, \"nodes\")\n", - "sql = _cc_generate_neighbours_representation()\n", - "linker._enqueue_sql(sql, \"__splink__df_neighbours\")\n", - "neighbours = linker._execute_sql_pipeline(input_dfs)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "8f34bd92-2639-4634-bdb5-e8b5e3faa8dd", - "metadata": {}, - "outputs": [], - "source": [ - "# # Create our initial representatives table\n", - "# sql = _cc_generate_initial_representatives_table()\n", - "# linker._enqueue_sql(sql, \"representatives\")\n", - "# sql = _cc_update_neighbours_first_iter()\n", - "# linker._enqueue_sql(sql, \"neighbours_first_iter\")\n", - "# sql = _cc_update_representatives_first_iter()\n", - "# # Execute if we have no batching, otherwise add it to our batched process\n", - "# linker._enqueue_sql(sql, \"__splink__df_representatives\")" - ] - }, - { - "cell_type": "markdown", - "id": "410795c3-6989-43f8-9ca8-ef7f4f829450", - "metadata": {}, - "source": [ - "And here's our crash. Let's try running it sequentially, which I hope will work?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f9065c1-9c13-4651-a56a-1e5f42463e31", - "metadata": {}, - "outputs": [], - "source": [ - "# representatives = linker._execute_sql_pipeline([neighbours])" - ] - }, - { - "cell_type": "markdown", - "id": "f4256ee7-61b0-4524-b4d2-cf075742db38", - "metadata": {}, - "source": [ - "Let's try." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "c8f1242e-5cd6-4e80-9864-afcbfed4bc78", - "metadata": {}, - "outputs": [], - "source": [ - "sql = _cc_generate_initial_representatives_table()\n", - "linker._enqueue_sql(sql, \"representatives\")\n", - "step_1 = linker._execute_sql_pipeline([neighbours])" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "392b3844-f610-4a8c-bd7e-c3d0bcdf0b90", - "metadata": {}, - "outputs": [], - "source": [ - "sql = _cc_update_neighbours_first_iter()\n", - "linker._enqueue_sql(sql, \"neighbours_first_iter\")\n", - "step_2 = linker._execute_sql_pipeline([step_1, neighbours])" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "9851c4d3-a736-4d23-a949-7c6f3d0e2177", - "metadata": {}, - "outputs": [], - "source": [ - "sql = _cc_update_representatives_first_iter()\n", - "# Execute if we have no batching, otherwise add it to our batched process\n", - "linker._enqueue_sql(sql, \"__splink__df_representatives\")\n", - "representatives = linker._execute_sql_pipeline([step_2, step_1, neighbours])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "47d4b8f0-4815-4a1a-b3c5-37d2e66d8a3e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
node_idrepresentativerep_match
0companieshouse_companies-__-07258900companieshouse_companies-__-07258900False
1companieshouse_companies-__-07259140companieshouse_companies-__-07259140False
2companieshouse_companies-__-07252840companieshouse_companies-__-07252840False
3companieshouse_companies-__-07253048companieshouse_companies-__-07253048False
4companieshouse_companies-__-07258011companieshouse_companies-__-07258011False
\n", - "
" - ], - "text/plain": [ - " node_id representative \\\n", - "0 companieshouse_companies-__-07258900 companieshouse_companies-__-07258900 \n", - "1 companieshouse_companies-__-07259140 companieshouse_companies-__-07259140 \n", - "2 companieshouse_companies-__-07252840 companieshouse_companies-__-07252840 \n", - "3 companieshouse_companies-__-07253048 companieshouse_companies-__-07253048 \n", - "4 companieshouse_companies-__-07258011 companieshouse_companies-__-07258011 \n", - "\n", - " rep_match \n", - "0 False \n", - "1 False \n", - "2 False \n", - "3 False \n", - "4 False " - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.query_sql(f\"\"\"\n", - " select * from {representatives.physical_name} limit 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bced313-716e-42bb-b046-9a1f12d05488", - "metadata": {}, - "outputs": [], - "source": [ - "sql = _cc_generate_initial_representatives_table()\n", - "linker._enqueue_sql(sql, \"representatives\")\n", - "sql = _cc_update_neighbours_first_iter()\n", - "linker._enqueue_sql(sql, \"neighbours_first_iter\")\n", - "step_2 = linker._execute_sql_pipeline([step_1, neighbours])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "08b17a84-ef2e-4de4-8e2b-9cfb7f895337", - "metadata": {}, - "outputs": [], - "source": [ - "prev_representatives_table = representatives" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "0eeef805-a092-49d5-81d2-442a68b83fc5", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrepresentatives\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop_table_from_database_and_remove_from_cache\u001b[49m()\n", - "\u001b[0;31mAttributeError\u001b[0m: 'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'" - ] - } - ], - "source": [ - "representatives.drop_table_from_database_and_remove_from_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "558f5d0c-7e3c-4b94-b833-da2ec41cf91d", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprev_representatives_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop_table_from_database_and_remove_from_cache\u001b[49m()\n", - "\u001b[0;31mAttributeError\u001b[0m: 'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'" - ] - } - ], - "source": [ - "prev_representatives_table.drop_table_from_database_and_remove_from_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "0b4da6f9-7d8b-4b28-ac54-fd62ec30dc6e", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[28], line 34\u001b[0m\n\u001b[1;32m 32\u001b[0m representatives \u001b[38;5;241m=\u001b[39m linker\u001b[38;5;241m.\u001b[39m_execute_sql_pipeline([neighbours])\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# Update table reference\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m \u001b[43mprev_representatives_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop_table_from_database_and_remove_from_cache\u001b[49m()\n\u001b[1;32m 35\u001b[0m prev_representatives_table \u001b[38;5;241m=\u001b[39m representatives\n\u001b[1;32m 37\u001b[0m \u001b[38;5;66;03m# Check if our exit condition has been met...\u001b[39;00m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'DuckDBLinkerDataFrame' object has no attribute 'drop_table_from_database_and_remove_from_cache'" - ] - } - ], - "source": [ - "# Loop while our representative table still has unsettled nodes\n", - "iteration, root_rows = 0, 1\n", - "while root_rows > 0:\n", - " start_time = time.time()\n", - " iteration += 1\n", - "\n", - " # Loop summary:\n", - "\n", - " # 1. Update our neighbours table.\n", - " # 2. Join on the representatives table from the previous iteration\n", - " # to create the \"rep_match\" column.\n", - " # 3. Assess if our exit condition has been met.\n", - "\n", - " # Generates our representatives table for the next iteration\n", - " # by joining our previous tables onto our neighbours table.\n", - " sql = _cc_generate_representatives_loop_cond(\n", - " prev_representatives_table.physical_name,\n", - " )\n", - " linker._enqueue_sql(sql, \"r\")\n", - " # Update our rep_match column in the representatives table.\n", - " sql = _cc_update_representatives_loop_cond(\n", - " prev_representatives_table.physical_name\n", - " )\n", - "\n", - " repr_name = f\"__splink__df_representatives_{iteration}\"\n", - "\n", - " representatives = linker._enqueue_sql(\n", - " sql,\n", - " repr_name,\n", - " )\n", - "\n", - " representatives = linker._execute_sql_pipeline([neighbours])\n", - " # Update table reference\n", - " prev_representatives_table.drop_table_from_database_and_remove_from_cache()\n", - " prev_representatives_table = representatives\n", - "\n", - " # Check if our exit condition has been met...\n", - " sql = _cc_assess_exit_condition(representatives.physical_name)\n", - "\n", - " linker._enqueue_sql(sql, \"__splink__df_root_rows\")\n", - "\n", - " root_rows_df = linker._execute_sql_pipeline(use_cache=False)\n", - "\n", - " root_rows = root_rows_df.as_record_dict()\n", - " root_rows_df.drop_table_from_database_and_remove_from_cache()\n", - " root_rows = root_rows[0][\"count\"]\n", - " logger.info(f\"Completed iteration {iteration}, root rows count {root_rows}\")\n", - " end_time = time.time()\n", - " logger.log(15, f\" Iteration time: {end_time - start_time} seconds\")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "35b08e73-5f4e-4a01-a385-5ffeda540a5d", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'DuckDBLinker' object has no attribute 'drop_table_from_database_and_remove_from_cache'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[29], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop_table_from_database_and_remove_from_cache\u001b[49m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'DuckDBLinker' object has no attribute 'drop_table_from_database_and_remove_from_cache'" - ] - } - ], - "source": [ - "linker.drop_table_from_database_and_remove_from_cache" - ] - }, - { - "cell_type": "markdown", - "id": "07c8fa16-ddfd-49b8-bdb8-911dfe5172a1", - "metadata": {}, - "source": [ - "### Postgres fails" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "63b4529c-e616-46e2-9a9c-892a0089f87f", - "metadata": {}, - "outputs": [], - "source": [ - "pg_con = du.sql_engine.connect()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "39c550f7-fc2b-4624-9507-91ef0ec93a17", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/linker.py:135: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " res = con.execute(text(final_sql))\n" - ] - }, - { - "ename": "ProgrammingError", - "evalue": "(psycopg2.errors.InvalidFunctionDefinition) return type mismatch in function declared to return double precision\nDETAIL: Actual return type is numeric.\nCONTEXT: SQL function \"ave_months_between\"\n\n[SQL: \n CREATE OR REPLACE FUNCTION ave_months_between(x date, y date)\n RETURNS float8 AS $$\n SELECT datediff(x, y)/30.4375;\n $$ LANGUAGE SQL IMMUTABLE;\n ]\n(Background on this error at: https://sqlalche.me/e/14/f405)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInvalidFunctionDefinition\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1905\u001b[0m, in \u001b[0;36mConnection._execute_context\u001b[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[1;32m 1904\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m evt_handled:\n\u001b[0;32m-> 1905\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdo_execute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1906\u001b[0m \u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m 1907\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1909\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_events \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine\u001b[38;5;241m.\u001b[39m_has_events:\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:736\u001b[0m, in \u001b[0;36mDefaultDialect.do_execute\u001b[0;34m(self, cursor, statement, parameters, context)\u001b[0m\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdo_execute\u001b[39m(\u001b[38;5;28mself\u001b[39m, cursor, statement, parameters, context\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 736\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mInvalidFunctionDefinition\u001b[0m: return type mismatch in function declared to return double precision\nDETAIL: Actual return type is numeric.\nCONTEXT: SQL function \"ave_months_between\"\n", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mProgrammingError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m pg_linker \u001b[38;5;241m=\u001b[39m \u001b[43mPostgresLinker\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_table_or_tables\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdu\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_dummy_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdu\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql_engine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m pg_linker\u001b[38;5;241m.\u001b[39mload_model(json_settings)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/linker.py:104\u001b[0m, in \u001b[0;36mPostgresLinker.__init__\u001b[0;34m(self, input_table_or_tables, settings_dict, engine, set_up_basic_logging, input_table_aliases, schema)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_db_schema \u001b[38;5;241m=\u001b[39m schema\n\u001b[1;32m 103\u001b[0m \u001b[38;5;66;03m# Create custom SQL functions in database\u001b[39;00m\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_register_custom_functions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_register_extensions()\n\u001b[1;32m 107\u001b[0m \u001b[38;5;66;03m# Create splink schema\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/linker.py:285\u001b[0m, in \u001b[0;36mPostgresLinker._register_custom_functions\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;66;03m# need for datediff levels\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_datediff_function()\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_create_months_between_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;66;03m# need for array_intersect levels\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_array_intersect_function()\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/linker.py:257\u001b[0m, in \u001b[0;36mPostgresLinker._create_months_between_function\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 250\u001b[0m ave_length_month \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m365.25\u001b[39m \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m12\u001b[39m\n\u001b[1;32m 251\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;124mCREATE OR REPLACE FUNCTION ave_months_between(x date, y date)\u001b[39m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;124mRETURNS float8 AS $$\u001b[39m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;124mSELECT datediff(x, y)/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mave_length_month\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m;\u001b[39m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;124m$$ LANGUAGE SQL IMMUTABLE;\u001b[39m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_sql_execution\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 259\u001b[0m sql_cast \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;124mCREATE OR REPLACE FUNCTION ave_months_between(\u001b[39m\n\u001b[1;32m 261\u001b[0m \u001b[38;5;124m x \u001b[39m\u001b[38;5;132;01m{dateish_type}\u001b[39;00m\u001b[38;5;124m, y \u001b[39m\u001b[38;5;132;01m{dateish_type}\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[38;5;124m$$ LANGUAGE SQL IMMUTABLE;\u001b[39m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dateish_type \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp with time zone\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/linker.py:135\u001b[0m, in \u001b[0;36mPostgresLinker._run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_sql_execution\u001b[39m(\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m, final_sql: \u001b[38;5;28mstr\u001b[39m, templated_name: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, physical_name: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 133\u001b[0m ):\n\u001b[1;32m 134\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mconnect() \u001b[38;5;28;01mas\u001b[39;00m con:\n\u001b[0;32m--> 135\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mcon\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_sql\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1385\u001b[0m, in \u001b[0;36mConnection.execute\u001b[0;34m(self, statement, *multiparams, **params)\u001b[0m\n\u001b[1;32m 1381\u001b[0m util\u001b[38;5;241m.\u001b[39mraise_(\n\u001b[1;32m 1382\u001b[0m exc\u001b[38;5;241m.\u001b[39mObjectNotExecutableError(statement), replace_context\u001b[38;5;241m=\u001b[39merr\n\u001b[1;32m 1383\u001b[0m )\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1385\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmultiparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_EMPTY_EXECUTION_OPTS\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:334\u001b[0m, in \u001b[0;36mClauseElement._execute_on_connection\u001b[0;34m(self, connection, multiparams, params, execution_options, _force)\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_execute_on_connection\u001b[39m(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;28mself\u001b[39m, connection, multiparams, params, execution_options, _force\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 332\u001b[0m ):\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _force \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msupports_execution:\n\u001b[0;32m--> 334\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_clauseelement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmultiparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexecution_options\u001b[49m\n\u001b[1;32m 336\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 338\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\u001b[38;5;241m.\u001b[39mObjectNotExecutableError(\u001b[38;5;28mself\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1577\u001b[0m, in \u001b[0;36mConnection._execute_clauseelement\u001b[0;34m(self, elem, multiparams, params, execution_options)\u001b[0m\n\u001b[1;32m 1565\u001b[0m compiled_cache \u001b[38;5;241m=\u001b[39m execution_options\u001b[38;5;241m.\u001b[39mget(\n\u001b[1;32m 1566\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompiled_cache\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine\u001b[38;5;241m.\u001b[39m_compiled_cache\n\u001b[1;32m 1567\u001b[0m )\n\u001b[1;32m 1569\u001b[0m compiled_sql, extracted_params, cache_hit \u001b[38;5;241m=\u001b[39m elem\u001b[38;5;241m.\u001b[39m_compile_w_cache(\n\u001b[1;32m 1570\u001b[0m dialect\u001b[38;5;241m=\u001b[39mdialect,\n\u001b[1;32m 1571\u001b[0m compiled_cache\u001b[38;5;241m=\u001b[39mcompiled_cache,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1575\u001b[0m linting\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdialect\u001b[38;5;241m.\u001b[39mcompiler_linting \u001b[38;5;241m|\u001b[39m compiler\u001b[38;5;241m.\u001b[39mWARN_LINTING,\n\u001b[1;32m 1576\u001b[0m )\n\u001b[0;32m-> 1577\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_context\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1578\u001b[0m \u001b[43m \u001b[49m\u001b[43mdialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1579\u001b[0m \u001b[43m \u001b[49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecution_ctx_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_compiled\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1580\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompiled_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1581\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistilled_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1582\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1583\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompiled_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1584\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistilled_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1585\u001b[0m \u001b[43m \u001b[49m\u001b[43melem\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1586\u001b[0m \u001b[43m \u001b[49m\u001b[43mextracted_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1587\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_hit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_hit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1588\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1589\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_events:\n\u001b[1;32m 1590\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdispatch\u001b[38;5;241m.\u001b[39mafter_execute(\n\u001b[1;32m 1591\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1592\u001b[0m elem,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1596\u001b[0m ret,\n\u001b[1;32m 1597\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1948\u001b[0m, in \u001b[0;36mConnection._execute_context\u001b[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[1;32m 1945\u001b[0m branched\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 1947\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m-> 1948\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_dbapi_exception\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1949\u001b[0m \u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m 1950\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1952\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2129\u001b[0m, in \u001b[0;36mConnection._handle_dbapi_exception\u001b[0;34m(self, e, statement, parameters, cursor, context)\u001b[0m\n\u001b[1;32m 2127\u001b[0m util\u001b[38;5;241m.\u001b[39mraise_(newraise, with_traceback\u001b[38;5;241m=\u001b[39mexc_info[\u001b[38;5;241m2\u001b[39m], from_\u001b[38;5;241m=\u001b[39me)\n\u001b[1;32m 2128\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m should_wrap:\n\u001b[0;32m-> 2129\u001b[0m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2130\u001b[0m \u001b[43m \u001b[49m\u001b[43msqlalchemy_exception\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwith_traceback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexc_info\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43me\u001b[49m\n\u001b[1;32m 2131\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2132\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2133\u001b[0m util\u001b[38;5;241m.\u001b[39mraise_(exc_info[\u001b[38;5;241m1\u001b[39m], with_traceback\u001b[38;5;241m=\u001b[39mexc_info[\u001b[38;5;241m2\u001b[39m])\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/util/compat.py:211\u001b[0m, in \u001b[0;36mraise_\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 208\u001b[0m exception\u001b[38;5;241m.\u001b[39m__cause__ \u001b[38;5;241m=\u001b[39m replace_context\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 213\u001b[0m \u001b[38;5;66;03m# credit to\u001b[39;00m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;66;03m# https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/\u001b[39;00m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;66;03m# as the __traceback__ object creates a cycle\u001b[39;00m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m exception, replace_context, from_, with_traceback\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1905\u001b[0m, in \u001b[0;36mConnection._execute_context\u001b[0;34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001b[0m\n\u001b[1;32m 1903\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 1904\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m evt_handled:\n\u001b[0;32m-> 1905\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdialect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdo_execute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1906\u001b[0m \u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m 1907\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1909\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_events \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine\u001b[38;5;241m.\u001b[39m_has_events:\n\u001b[1;32m 1910\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdispatch\u001b[38;5;241m.\u001b[39mafter_cursor_execute(\n\u001b[1;32m 1911\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1912\u001b[0m cursor,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1916\u001b[0m context\u001b[38;5;241m.\u001b[39mexecutemany,\n\u001b[1;32m 1917\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/sqlalchemy/engine/default.py:736\u001b[0m, in \u001b[0;36mDefaultDialect.do_execute\u001b[0;34m(self, cursor, statement, parameters, context)\u001b[0m\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdo_execute\u001b[39m(\u001b[38;5;28mself\u001b[39m, cursor, statement, parameters, context\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 736\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mProgrammingError\u001b[0m: (psycopg2.errors.InvalidFunctionDefinition) return type mismatch in function declared to return double precision\nDETAIL: Actual return type is numeric.\nCONTEXT: SQL function \"ave_months_between\"\n\n[SQL: \n CREATE OR REPLACE FUNCTION ave_months_between(x date, y date)\n RETURNS float8 AS $$\n SELECT datediff(x, y)/30.4375;\n $$ LANGUAGE SQL IMMUTABLE;\n ]\n(Background on this error at: https://sqlalche.me/e/14/f405)" - ] - } - ], - "source": [ - "pg_linker = PostgresLinker(\n", - " input_table_or_tables=du.generate_dummy_df(),\n", - " engine=du.sql_engine,\n", - ")\n", - "pg_linker.load_model(json_settings)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "de5f59bc-6484-42bf-a55c-39f27022db98", - "metadata": {}, - "outputs": [], - "source": [ - "df_clusters = pg_linker.cluster_pairwise_predictions_at_threshold(\n", - " df_predict, \n", - " 0.7\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "06ffe9d4-55be-4161-aff3-23976b0c33ed", - "metadata": {}, - "outputs": [ - { - "ename": "type", - "evalue": "'str' object has no attribute 'physical_name'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[20], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m clusters \u001b[38;5;241m=\u001b[39m \u001b[43mlinker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcluster_pairwise_predictions_at_threshold\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mpred_path\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_posix\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mthreshold_match_probability\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.7\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mpairwise_formatting\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilter_pairwise_format_for_clusters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/linker.py:2010\u001b[0m, in \u001b[0;36mLinker.cluster_pairwise_predictions_at_threshold\u001b[0;34m(self, df_predict, threshold_match_probability, pairwise_formatting, filter_pairwise_format_for_clusters)\u001b[0m\n\u001b[1;32m 2004\u001b[0m \u001b[38;5;66;03m# Feeding in df_predict forces materiailisation, if it exists in your database\u001b[39;00m\n\u001b[1;32m 2005\u001b[0m concat_with_tf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initialise_df_concat_with_tf(df_predict)\n\u001b[1;32m 2007\u001b[0m edges_table \u001b[38;5;241m=\u001b[39m _cc_create_unique_id_cols(\n\u001b[1;32m 2008\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 2009\u001b[0m concat_with_tf\u001b[38;5;241m.\u001b[39mphysical_name,\n\u001b[0;32m-> 2010\u001b[0m \u001b[43mdf_predict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mphysical_name\u001b[49m,\n\u001b[1;32m 2011\u001b[0m threshold_match_probability,\n\u001b[1;32m 2012\u001b[0m )\n\u001b[1;32m 2014\u001b[0m cc \u001b[38;5;241m=\u001b[39m solve_connected_components(\n\u001b[1;32m 2015\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 2016\u001b[0m edges_table,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2020\u001b[0m filter_pairwise_format_for_clusters,\n\u001b[1;32m 2021\u001b[0m )\n\u001b[1;32m 2023\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cc\n", - "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'physical_name'" - ] - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " f\"'{pred_path.as_posix()}'\",\n", - " threshold_match_probability=0.7,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95e3c12a-f07c-4e7a-a390-faf10bec8f04", - "metadata": {}, - "outputs": [], - "source": [ - "lookup = linker.query_sql(\n", - " f\"\"\"\n", - " select\n", - " source_dataset_l as source,\n", - " unique_id_l as source_id,\n", - " cluster_id_l as source_cluster,\n", - " source_dataset_r as target,\n", - " unique_id_r as target_id,\n", - " cluster_id_r as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " union\n", - " select\n", - " source_dataset_r as source,\n", - " unique_id_r as source_id,\n", - " cluster_id_r as source_cluster,\n", - " source_dataset_l as target,\n", - " unique_id_l as target_id,\n", - " cluster_id_l as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " \"\"\",\n", - " output_type=\"splink_df\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_splink-physical.ipynb b/notebooks/models/splink/WL_splink-physical.ipynb deleted file mode 100644 index b14c872..0000000 --- a/notebooks/models/splink/WL_splink-physical.ipynb +++ /dev/null @@ -1,705 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "41289fef-1d16-4d33-8ee4-a6e120f06cb6", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "65b6cebf-85bb-49c9-9120-8f5b13cfbe2e", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import pyarrow.dataset as ds\n", - "from pgpq import ArrowToPostgresBinaryEncoder\n", - "import psycopg\n", - "from tqdm import tqdm\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "\n", - "from cmf.data import utils as du\n", - "import cmf.locations as loc\n", - "from cmf.config import settings\n", - "\n", - "CLUSTER_PATH = Path(loc.DATA_SUBDIR['processed']) / 'company-matching__full' / 'clusters.parquet' " - ] - }, - { - "cell_type": "markdown", - "id": "d0dfbcb7-f88b-4412-949c-3f59ddf13685", - "metadata": { - "tags": [] - }, - "source": [ - "# Using Splink with physical duckdb\n", - "\n", - "Gonna try and run it off the file system. Raw db about 1GB pre-Splink." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b9e42069-7719-467d-8dd2-5a66cac6be67", - "metadata": {}, - "outputs": [], - "source": [ - "con = du.get_duckdb_connection()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "af87a859-1957-42b0-a49b-97a148d66ec8", - "metadata": {}, - "outputs": [], - "source": [ - "table_name = []\n", - "table_alias = []\n", - "\n", - "for i in con.query(\"select * from table_alias_lookup;\").fetchall():\n", - " table_alias.append(i[0])\n", - " table_name.append(i[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "2def9a4f-16d5-49eb-80b6-6c3d3482f6b3", - "metadata": {}, - "outputs": [], - "source": [ - "linker = DuckDBLinker(\n", - " table_name,\n", - " settings_dict=settings,\n", - " connection=con,\n", - " input_table_aliases=table_alias,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "f515f37c-9cca-4a1c-988f-95b539cb182b", - "metadata": {}, - "source": [ - "## Train" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "5107e63b-09a3-492a-b1e1-e6a3be30abc5", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 3.24e-06.\n", - "This means that amongst all possible pairwise record comparisons, one in 309,025.51 are expected to match. With 40,009,433,095,801 total possible comparisons, we expect a total of around 129,469,675.71 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " recall=0.7,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "1c00d60a-7a09-41b2-9957-5faf36053675", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for comp_num_clean - Exact match (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some u values are not trained, no m values are trained).\n", - " - name_unusual_tokens (no m values are trained).\n", - " - postcode (no m values are trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(max_pairs=1e7)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "9bf50a59-457f-4219-8c10-2671779a3944", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "---- Estimating m probabilities using from column comp_num_clean -----\n", - "m probability not trained for comp_num_clean - Jaro_winkler_similarity >= 0.75 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "m probability not trained for comp_num_clean - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some u values are not trained, some m values are not trained).\n", - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " and l.postcode_area = r.postcode_area\n", - "\n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - comp_num_clean\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - name_unusual_tokens\n", - "\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 1: Largest change in params was 0.669 in the m_probability of postcode, level `Exact match postcode`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 2: Largest change in params was 0.0589 in the m_probability of comp_num_clean, level `All other comparisons`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 3: Largest change in params was 0.00527 in the m_probability of comp_num_clean, level `All other comparisons`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 4: Largest change in params was 5.32e-05 in the m_probability of comp_num_clean, level `All other comparisons`\n", - "\n", - "EM converged after 4 iterations\n", - "m probability not trained for postcode - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some u values are not trained).\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.estimate_m_from_label_column(\"comp_num_clean\")\n", - "m_by_name_and_postcode_area = \"\"\"\n", - " l.name_unusual_tokens = r.name_unusual_tokens\n", - " and l.postcode_area = r.postcode_area\n", - "\"\"\"\n", - "linker.estimate_parameters_using_expectation_maximisation(\n", - " m_by_name_and_postcode_area\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "045c4bc3-fd4e-4b86-ac4d-212c0f0c3a4c", - "metadata": {}, - "source": [ - "## Predict" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e5712b44-6919-4420-9b93-14468f8e0662", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'comp_num_clean':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "predictions = linker.predict(threshold_match_probability=0.7)" - ] - }, - { - "cell_type": "markdown", - "id": "453b4d9b-b283-47d8-a58b-03b68bb04a12", - "metadata": {}, - "source": [ - "## Cluster" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "0845d55e-7f69-4794-ae6c-2a68facc55e7", - "metadata": {}, - "outputs": [], - "source": [ - "predict_table = con.query(\"\"\"\n", - " select table_name\n", - " from information_schema.tables\n", - " where table_name like '%predict%';\n", - "\"\"\").fetchone()[0]\n", - "predictions = linker.register_table(predict_table, predict_table)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "738c3dec-a32c-4123-a1cf-6f50fb338f27", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Completed iteration 1, root rows count 11839\n", - "Completed iteration 2, root rows count 209\n", - "Completed iteration 3, root rows count 97\n", - "Completed iteration 4, root rows count 3\n", - "Completed iteration 5, root rows count 0\n" - ] - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " predictions,\n", - " threshold_match_probability=0.7,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "8a6ad9f1-2ac2-4068-86d7-4adc9a3c4797", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'__splink__df_representatives_7d70c1bd5'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clusters.physical_name" - ] - }, - { - "cell_type": "markdown", - "id": "b794845b-808d-4c38-abae-5ed916341e84", - "metadata": {}, - "source": [ - "## Review" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8db53b9f-a23a-4fb2-b4df-03460ca3ae02", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────┐\n", - "│ count_star() │\n", - "│ int64 │\n", - "├──────────────┤\n", - "│ 79559684 │\n", - "└──────────────┘" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"select count(*) from {predict_table};\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "08de38a7-d98b-4954-ace9-749ba2ead129", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────┐\n", - "│ count_star() │\n", - "│ int64 │\n", - "├──────────────┤\n", - "│ 79559684 │\n", - "└──────────────┘" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"select count(*) from {clusters.physical_name};\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "8cc32ad2-4ef4-41ad-ba8a-7fd1996b3301", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────┬───────────────┬────────────┬───┬─────────────┬──────────┬──────────────┬──────────────┐\n", - "│ database_name │ database_size │ block_size │ … │ free_blocks │ wal_size │ memory_usage │ memory_limit │\n", - "│ varchar │ varchar │ int64 │ │ int64 │ varchar │ varchar │ varchar │\n", - "├──────────────────┼───────────────┼────────────┼───┼─────────────┼──────────┼──────────────┼──────────────┤\n", - "│ company_matching │ 6.9GB │ 262144 │ … │ 1361 │ 0 bytes │ 3.0GB │ 26.5GB │\n", - "├──────────────────┴───────────────┴────────────┴───┴─────────────┴──────────┴──────────────┴──────────────┤\n", - "│ 1 rows 9 columns (7 shown) │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(\"pragma database_size;\")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "39da8589-740e-4b1a-9378-126dd42fa789", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌────────────────────────────────────────────────┐\n", - "│ table_name │\n", - "│ varchar │\n", - "├────────────────────────────────────────────────┤\n", - "│ __splink__df_comparison_vectors_567a6e822 │\n", - "│ __splink__m_u_counts_8f910cdd0 │\n", - "│ __splink__df_concat_with_tf_5f189976e │\n", - "│ __splink__df_concat_484e1f2be │\n", - "│ __splink__df_representatives_7d70c1bd5 │\n", - "│ __splink__df_representatives_5_e86c6fd2a │\n", - "│ dit_export_wins__wins_dataset │\n", - "│ dit_data_hub__companies │\n", - "│ companieshouse_companies │\n", - "│ __splink__df_predict_2dbb7ef10 │\n", - "│ hmrc_trade__exporters │\n", - "│ __splink__df_connected_components_df_92bb91368 │\n", - "│ table_alias_lookup │\n", - "│ __splink__df_neighbours_8a7323701 │\n", - "│ unique_id_lookup │\n", - "├────────────────────────────────────────────────┤\n", - "│ 15 rows │\n", - "└────────────────────────────────────────────────┘" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(\"\"\"\n", - " select table_name\n", - " from information_schema.tables;\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "12259af6-6c4e-4197-86c1-f317950e5956", - "metadata": {}, - "source": [ - "## Export" - ] - }, - { - "cell_type": "markdown", - "id": "2aedf31e-bb6b-46ba-a2ed-68c39ac14f3b", - "metadata": {}, - "source": [ - "When prediction and cluster threshold are the same, source and target cluster are identical. We can drop one.\n", - "\n", - "If this ever changes, the below will break." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "99332ff3-5b26-42ae-abe4-8546fbb970ca", - "metadata": {}, - "outputs": [], - "source": [ - "con.query(f\"\"\"\n", - " copy (\n", - " select\n", - " src_tbl.table_name as source,\n", - " src_id.unique_id as source_id,\n", - " cl.source_cluster,\n", - " tgt_tbl.table_name as target,\n", - " tgt_id.unique_id as target_id,\n", - " cl.target_cluster,\n", - " cl.match_probability\n", - " from (\n", - " select\n", - " source_dataset_l as source,\n", - " unique_id_l as source_id,\n", - " cluster_id_l as source_cluster,\n", - " source_dataset_r as target,\n", - " unique_id_r as target_id,\n", - " cluster_id_r as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " union\n", - " select\n", - " source_dataset_r as source,\n", - " unique_id_r as source_id,\n", - " cluster_id_r as source_cluster,\n", - " source_dataset_l as target,\n", - " unique_id_l as target_id,\n", - " cluster_id_l as target_cluster,\n", - " match_probability\n", - " from\n", - " { clusters.physical_name }\n", - " ) cl\n", - " join table_alias_lookup src_tbl on\n", - " (cl.source = src_tbl.id)\n", - " join unique_id_lookup src_id on\n", - " (cl.source_id = src_id.id)\n", - " join table_alias_lookup tgt_tbl on\n", - " (cl.target = tgt_tbl.id)\n", - " join unique_id_lookup tgt_id on\n", - " (cl.target_id = tgt_id.id)\n", - " )\n", - " to '{CLUSTER_PATH.as_posix()}'\n", - " (format parquet);\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "6ee93666-cdab-46e7-8b14-5b055c5e2596", - "metadata": {}, - "source": [ - "* 15 mins to write to Data Workspace\n", - " * About 5 to memory\n", - " * 10 to disk\n", - "* 28 mins to write two indexes" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "86d64340-3871-4081-b12d-5e0de37cc039", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "1294it [05:47, 3.73it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1min 37s, sys: 16.3 s, total: 1min 54s\n", - "Wall time: 14min 20s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "to_write = ds.dataset(CLUSTER_PATH)\n", - "encoder = ArrowToPostgresBinaryEncoder(to_write.schema)\n", - "pg_schema = encoder.schema()\n", - "cols = [f'\"{col_name}\" {col.data_type.ddl()}' for col_name, col in pg_schema.columns]\n", - "ddl = f\"create temp table data ({','.join(cols)})\"\n", - "\n", - "with psycopg.connect(\"postgres://\") as conn:\n", - " with conn.cursor() as cur:\n", - " cur.execute(ddl) \n", - " with cur.copy(\"copy data from stdin with (format binary)\") as copy:\n", - " copy.write(encoder.write_header())\n", - " for batch in tqdm(to_write.to_batches()):\n", - " copy.write(encoder.write_batch(batch))\n", - " copy.write(encoder.finish())\n", - " cur.execute(\"drop table if exists \\\"_user_eaf4fd9a\\\".\\\"lookup\\\"\")\n", - " cur.execute(\"\"\"\n", - " create table \\\"_user_eaf4fd9a\\\".\\\"lookup\\\" as \n", - " select * from data\n", - " \"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "de763685-e345-4a7a-a85e-7cb4e7043fb6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 284 ms, sys: 124 ms, total: 408 ms\n", - "Wall time: 28min 18s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "with psycopg.connect(\"postgres://\") as conn:\n", - " with conn.cursor() as cur:\n", - " cur.execute(\"drop index if exists \\\"idx_wl_lookup_src_tgt\\\"\")\n", - " cur.execute(\"drop index if exists \\\"idx_wl_lookup_src_tgt_id\\\"\")\n", - " \n", - " cur.execute(\"create index \\\"idx_wl_lookup_src_tgt\\\" on \\\"_user_eaf4fd9a\\\".\\\"lookup\\\"(source, target)\")\n", - " cur.execute(\"create index \\\"idx_wl_lookup_src_tgt_id\\\" on \\\"_user_eaf4fd9a\\\".\\\"lookup\\\"(source_id, target_id)\")" - ] - }, - { - "cell_type": "markdown", - "id": "8b027eac-87f7-4db6-a2ad-5513bfe48c4c", - "metadata": {}, - "source": [ - "## Debug" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "21c6f3c7-82b6-4c80-90f4-3a9259a0b52e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────┬───────────┬────────────────┬───┬───────────┬────────────────┬───────────────────┐\n", - "│ source │ source_id │ source_cluster │ … │ target_id │ target_cluster │ match_probability │\n", - "│ varchar │ varchar │ varchar │ │ varchar │ varchar │ double │\n", - "├──────────────────────┼───────────┼────────────────┼───┼───────────┼────────────────┼───────────────────┤\n", - "│ hmrc_trade__export… │ 357429 │ 1-__-1009404 │ … │ 2909577 │ 1-__-1009404 │ 0.992428795516835 │\n", - "│ hmrc_trade__export… │ 3128298 │ 1-__-1009404 │ … │ 420973 │ 1-__-1009404 │ 0.992428795516835 │\n", - "│ hmrc_trade__export… │ 357429 │ 1-__-1009404 │ … │ 1702586 │ 1-__-1009404 │ 0.992428795516835 │\n", - "│ hmrc_trade__export… │ 3128298 │ 1-__-1009404 │ … │ 1494950 │ 1-__-1009404 │ 0.992428795516835 │\n", - "│ hmrc_trade__export… │ 3128298 │ 1-__-1009404 │ … │ 1993588 │ 1-__-1009404 │ 0.992428795516835 │\n", - "├──────────────────────┴───────────┴────────────────┴───┴───────────┴────────────────┴───────────────────┤\n", - "│ 5 rows 7 columns (6 shown) │\n", - "└────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.sql(f\"select * from '{CLUSTER_PATH.as_posix()}' limit 5;\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4903fa78-66e9-4da9-8edf-de7ad612d1a5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────┐\n", - "│ count_star() │\n", - "│ int64 │\n", - "├──────────────┤\n", - "│ 0 │\n", - "└──────────────┘" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.sql(f\"select count(*) from '{CLUSTER_PATH.as_posix()}' where source_cluster != target_cluster;\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_splink-postgres.ipynb b/notebooks/models/splink/WL_splink-postgres.ipynb deleted file mode 100644 index ce84345..0000000 --- a/notebooks/models/splink/WL_splink-postgres.ipynb +++ /dev/null @@ -1,135 +0,0 @@ -{ - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16-final" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python_defaultSpec_1687881882472", - "display_name": "Python 3.9.16 64-bit ('company_matching': conda)" - } - }, - "nbformat": 4, - "nbformat_minor": 2, - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": "/opt/conda/envs/company_matching/lib/python3.9/site-packages/splink/postgres/comparison_template_library.py:9: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead\n logger.warn(\nThe Comparison Template Library is not currently implemented for Postgres due to limited string matching capability in `cll.comparison_level_library`\n" - } - ], - "source": [ - "from splink.postgres.linker import PostgresLinker\n", - "import splink.postgres.comparison_library as cl\n", - "import splink.postgres.comparison_template_library as ctl\n" - ] - }, - { - "source": [ - "In short -- key matching techniques (Jaro-Winkler) aren't implemented in PostgreSQL. This isn't a goer" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "output_type": "error", - "ename": "AttributeError", - "evalue": "module 'splink.postgres.comparison_library' has no attribute 'jaro_winkler_at_thresholds'", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 43\u001b[0m\n\u001b[1;32m 1\u001b[0m settings \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlink_type\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlink_and_dedupe\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mretain_matching_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mretain_intermediate_calculation_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblocking_rules_to_generate_predictions\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\n\u001b[1;32m 6\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;03m ((l.comp_num_clean = r.comp_num_clean))\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124;03m and (\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124;03m l.comp_num_clean <> ''\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124;03m and r.comp_num_clean <> ''\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m,\n\u001b[1;32m 13\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124;03m (l.name_unusual_tokens = r.name_unusual_tokens)\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;124;03m and (\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;124;03m l.name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124;03m and r.name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m,\n\u001b[1;32m 20\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;124;03m (l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens)\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124;03m and (\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;124;03m l.secondary_name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;124;03m and r.secondary_name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m,\n\u001b[1;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m (l.secondary_name_unusual_tokens = r.name_unusual_tokens)\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m and (\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m l.secondary_name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m and r.name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m,\n\u001b[1;32m 34\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03m (r.secondary_name_unusual_tokens = l.name_unusual_tokens)\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m and (\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;124;03m r.secondary_name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m and l.name_unusual_tokens <> ''\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m,\n\u001b[1;32m 41\u001b[0m ],\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcomparisons\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\n\u001b[0;32m---> 43\u001b[0m \u001b[43mcl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjaro_winkler_at_thresholds\u001b[49m(\n\u001b[1;32m 44\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcomp_num_clean\u001b[39m\u001b[38;5;124m\"\u001b[39m, [\u001b[38;5;241m0.75\u001b[39m], term_frequency_adjustments\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 45\u001b[0m ),\n\u001b[1;32m 46\u001b[0m cl\u001b[38;5;241m.\u001b[39mjaro_winkler_at_thresholds(\n\u001b[1;32m 47\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname_unusual_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, [\u001b[38;5;241m0.9\u001b[39m, \u001b[38;5;241m0.6\u001b[39m], term_frequency_adjustments\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 48\u001b[0m ),\n\u001b[1;32m 49\u001b[0m ctl\u001b[38;5;241m.\u001b[39mpostcode_comparison(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpostcode\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 50\u001b[0m ],\n\u001b[1;32m 51\u001b[0m }\n", - "\u001b[0;31mAttributeError\u001b[0m: module 'splink.postgres.comparison_library' has no attribute 'jaro_winkler_at_thresholds'" - ] - } - ], - "source": [ - "settings = {\n", - " \"link_type\": \"link_and_dedupe\",\n", - " \"retain_matching_columns\": False,\n", - " \"retain_intermediate_calculation_columns\": False,\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"\"\"\n", - " ((l.comp_num_clean = r.comp_num_clean))\n", - " and (\n", - " l.comp_num_clean <> ''\n", - " and r.comp_num_clean <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.name_unusual_tokens = r.name_unusual_tokens)\n", - " and (\n", - " l.name_unusual_tokens <> ''\n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens)\n", - " and (\n", - " l.secondary_name_unusual_tokens <> ''\n", - " and r.secondary_name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (l.secondary_name_unusual_tokens = r.name_unusual_tokens)\n", - " and (\n", - " l.secondary_name_unusual_tokens <> ''\n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " \"\"\"\n", - " (r.secondary_name_unusual_tokens = l.name_unusual_tokens)\n", - " and (\n", - " r.secondary_name_unusual_tokens <> ''\n", - " and l.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " ],\n", - " \"comparisons\": [\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"comp_num_clean\", [0.75], term_frequency_adjustments=True\n", - " ),\n", - " cl.jaro_winkler_at_thresholds(\n", - " \"name_unusual_tokens\", [0.9, 0.6], term_frequency_adjustments=True\n", - " ),\n", - " ctl.postcode_comparison(\"postcode\")\n", - " ],\n", - "}" - ] - } - ] -} \ No newline at end of file diff --git a/notebooks/models/splink/WL_splink-s3.ipynb b/notebooks/models/splink/WL_splink-s3.ipynb deleted file mode 100644 index e9e6ac4..0000000 --- a/notebooks/models/splink/WL_splink-s3.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "7869715c-d4ce-40b6-861f-6f811563bd26", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "efa7a6c4-c036-4ade-b379-20431e9aa4ee", - "metadata": {}, - "outputs": [], - "source": [ - "import duckdb\n", - "import boto3\n", - "import os\n", - "import pandas as pd\n", - "import requests\n", - "from pathlib import Path\n", - "\n", - "import cmf.locations as loc\n", - "\n", - "r = requests.get(\n", - " 'http://169.254.170.2' + \n", - " os.environ['AWS_CONTAINER_CREDENTIALS_RELATIVE_URI']\n", - ")\n", - "\n", - "AWS_CREDS = r.json()\n", - "HTTPFS_PATH = loc.PROJECT_DIR / 'scratch' / 'httpfs.duckdb_extension'" - ] - }, - { - "cell_type": "markdown", - "id": "1a767a56-79c5-4c73-a8c5-9b59e61a5e6a", - "metadata": {}, - "source": [ - "## Read from team S3: `boto`" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "be579556-8716-472c-9855-56af20022c88", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
foobar
0a1
1b2
2c3
\n", - "
" - ], - "text/plain": [ - " foo bar\n", - "0 a 1\n", - "1 b 2\n", - "2 c 3" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client = boto3.client('s3', region_name=os.environ['S3_REGION']) \n", - "response = client.get_object(\n", - " Bucket='jupyter.notebook.uktrade.io', \n", - " Key=os.environ['S3_PREFIX_TEAM_DDAT_DATA_SCIENCE'] + '.tmp/dummy.csv'\n", - ") \n", - "df = pd.read_csv(response['Body'])\n", - "df" - ] - }, - { - "cell_type": "markdown", - "id": "80f5bc0f-195f-4014-9a10-fa396a1741ae", - "metadata": {}, - "source": [ - "## Read/write from team S3: `duckdb`" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d6a33127-36c6-46e3-95f4-d3f2bfb24197", - "metadata": {}, - "outputs": [], - "source": [ - "con = duckdb.connect()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "47689981-6616-40f2-8cb9-b929764a2782", - "metadata": {}, - "outputs": [], - "source": [ - "# via https://duckdb.org/docs/extensions/httpfs.html\n", - "\n", - "con.query(f\"\"\"\n", - " install '{HTTPFS_PATH.resolve()}';\n", - " load '{HTTPFS_PATH.resolve()}';\n", - " set s3_region='{os.environ['S3_REGION']}';\n", - " set s3_access_key_id='{AWS_CREDS['AccessKeyId']}';\n", - " set s3_secret_access_key='{AWS_CREDS['SecretAccessKey']}';\n", - " set s3_session_token='{AWS_CREDS['Token']}';\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b69cce58-7967-4894-b58f-99e6fc01a8dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬───────┐\n", - "│ foo │ bar │\n", - "│ varchar │ int64 │\n", - "├─────────┼───────┤\n", - "│ a │ 1 │\n", - "│ b │ 2 │\n", - "│ c │ 3 │\n", - "└─────────┴───────┘" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"\"\"\n", - " select *\n", - " from 's3://{\n", - " '/'.join([\n", - " os.environ['S3_BUCKET'],\n", - " os.environ['S3_PREFIX_TEAM_DDAT_DATA_SCIENCE'][:-1],\n", - " '.tmp',\n", - " 'dummy.csv'\n", - " ])\n", - " }';\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "267da13f-628a-4330-9c2f-f1ff18ac2a52", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({'col1': ['alpha', 'beta'], 'col2': [3.14, 2.72]})" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a9e45682-b03e-4af0-83a5-6223d4bc471d", - "metadata": {}, - "outputs": [], - "source": [ - "con.query(f\"\"\"\n", - " copy df\n", - " to 's3://{\n", - " '/'.join([\n", - " os.environ['S3_BUCKET'],\n", - " os.environ['S3_PREFIX_TEAM_DDAT_DATA_SCIENCE'][:-1],\n", - " '.tmp',\n", - " 'dummy_out.parquet'\n", - " ])\n", - " }'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "396a3014-322e-4c6d-94dc-d2658a028e51", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬────────┐\n", - "│ col1 │ col2 │\n", - "│ varchar │ double │\n", - "├─────────┼────────┤\n", - "│ alpha │ 3.14 │\n", - "│ beta │ 2.72 │\n", - "└─────────┴────────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"\"\"\n", - " select *\n", - " from 's3://{\n", - " '/'.join([\n", - " os.environ['S3_BUCKET'],\n", - " os.environ['S3_PREFIX_TEAM_DDAT_DATA_SCIENCE'][:-1],\n", - " '.tmp',\n", - " 'dummy_out.parquet'\n", - " ])\n", - " }';\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "56e46a79-2e1d-4722-8673-7c50601dfb49", - "metadata": {}, - "source": [ - "## Use team S3 as temporary `duckdb` storage" - ] - }, - { - "cell_type": "markdown", - "id": "c9710d43-159d-4415-a4a5-1b11eb3b4f8e", - "metadata": {}, - "source": [ - "Inconclusive, can't force it to use the S3 temp. Let's try it in production." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3d6b5426-531c-4e2c-b89b-f3f413095f9e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────────────────────────────────────────────────────────────────┐\n", - "│ current_setting('temp_directory') │\n", - "│ varchar │\n", - "├─────────────────────────────────────────────────────────────────────┤\n", - "│ s3://jupyter.notebook.uktrade.io/teams/_team_ddat_data_science/.tmp │\n", - "└─────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"\"\"\n", - " set temp_directory='s3://{\n", - " '/'.join([\n", - " os.environ['S3_BUCKET'],\n", - " os.environ['S3_PREFIX_TEAM_DDAT_DATA_SCIENCE'][:-1],\n", - " '.tmp'\n", - " ])\n", - " }';\n", - " select current_setting('temp_directory');\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "36a8270d-28f4-4c5a-a5fe-7e5e05641a2f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────────────────────────────┐\n", - "│ current_setting('memory_limit') │\n", - "│ varchar │\n", - "├─────────────────────────────────┤\n", - "│ 26.4GB │\n", - "└─────────────────────────────────┘" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# set memory_limit='0.01GB';\n", - "# reset memory_limit;\n", - "con.query(\"\"\"\n", - " select current_setting('memory_limit'); \n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "bd88e40c-0aed-4445-bb1c-e03d8cefaa4c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────────┬───────────┬────────────────┬───┬────────────┬─────────────────┬────────────────┐\n", - "│ postcode_area │ unique_id │ comp_num_clean │ … │ name_sig │ name_sig_first5 │ name_sig_last5 │\n", - "│ varchar │ varchar │ double │ │ varchar │ varchar │ varchar │\n", - "├───────────────┼───────────┼────────────────┼───┼────────────┼─────────────────┼────────────────┤\n", - "│ NE │ 1 │ NULL │ … │ clzbldjmmg │ clzbl │ djmmg │\n", - "│ SG │ 2 │ NULL │ … │ cluuck │ cluuc │ luuck │\n", - "│ GU │ 3 │ NULL │ … │ mdclg │ mdclg │ mdclg │\n", - "│ SE │ 4 │ NULL │ … │ cplddf │ cpldd │ plddf │\n", - "│ NP │ 5 │ NULL │ … │ fvpduc │ fvpdu │ vpduc │\n", - "├───────────────┴───────────┴────────────────┴───┴────────────┴─────────────────┴────────────────┤\n", - "│ 5 rows 13 columns (6 shown) │\n", - "└────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "con.query(f\"\"\"\n", - " select\n", - " *\n", - " from\n", - " '{\n", - " '/'.join([\n", - " loc.DATA_SUBDIR['processed'],\n", - " 'company-matching__full',\n", - " 'hmrc_trade__exporters.parquet'\n", - " ])\n", - " }'\n", - " limit 5;\n", - "\"\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "company_matching", - "language": "python", - "name": "company_matching" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/models/splink/WL_splink-tests-2.ipynb b/notebooks/models/splink/WL_splink-tests-2.ipynb deleted file mode 100644 index b442bae..0000000 --- a/notebooks/models/splink/WL_splink-tests-2.ipynb +++ /dev/null @@ -1,3657 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fe85d7d5-8de2-483c-a004-0a7703a88138", - "metadata": {}, - "source": [ - "# Splink tests 2\n", - "\n", - "Somewhere clean to tighten up the pipeline as I get to a quicker iteration." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8f79bd43-bfbf-4f55-b929-48c49c25a212", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "from IPython.display import IFrame\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "00297a5f-93ec-40b4-8fab-8266aa9cbb62", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "RendererRegistry.enable('mimetype')" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import duckdb\n", - "import pandas as pd\n", - "import polars as pl\n", - "import random\n", - "import datetime\n", - "import os\n", - "\n", - "import altair as alt\n", - "alt.renderers.enable(\"mimetype\")\n", - "\n", - "from cmf.models import model_train as ld\n", - "from cmf.features.clean_complex import clean_comp_names\n", - "from cmf.config import stopwords\n", - "from cmf.config import settings\n", - "from cmf.features.clean_basic import (\n", - " remove_notnumbers_leadingzeroes,\n", - " clean_company_name,\n", - " array_except,\n", - " array_intersect,\n", - " list_join_to_string,\n", - ")\n", - "from cmf import locations as loc\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "from splink.charts import save_offline_chart" - ] - }, - { - "cell_type": "markdown", - "id": "4be42707-e6e5-4ea6-81f5-d2eebf8849d7", - "metadata": {}, - "source": [ - "# TODO\n", - "\n", - "21/6. I've got a strategy. I have example queries that use a generated lookup to join n number of target tables to a source, both permitting and not permitting duplication in the target. I believe that link_and_dedupe will create the clusters I need to successfully sort this out.\n", - "\n", - "The blocking rules are hugely improved but estimating m is taking weirdly long. I think replacing '' with nulls will sort this out.\n", - "\n", - "* Change '' to nulls in data selection -- suspect this is what's slowing down m estimation in the company_number column (tonnes of false dupes)\n", - "* Figure out why dupes in EW aren't in one cluster\n", - " * Changed linker to link_and_dedupe -- needs testing\n", - " * If this works, ready to productionise" - ] - }, - { - "cell_type": "markdown", - "id": "f51b1677-db58-4b50-87ea-08625b05efe6", - "metadata": {}, - "source": [ - "## Data" - ] - }, - { - "cell_type": "markdown", - "id": "5c5b098d-8405-4eb4-a35c-1fd0ddf55a05", - "metadata": {}, - "source": [ - "Data we need to bring in:\n", - "\n", - "* ✅ Companies house\n", - "* ✅ Data Hub companies\n", - "* ✅ HMRC exporters\n", - "* ✅ Export wins" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "5d462348-92af-48c8-ad08-76afdddfa652", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/pandas/io/sql.py:1410: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " meta = MetaData(self.connectable, schema=schema)\n" - ] - } - ], - "source": [ - "df_ch = ld.comp_house_read(100_000)\n", - "df_ch_clean = ld.clean_numbers_and_names(df_ch)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "6fbde7ee-8f02-46b8-b31a-4b4bef496e61", - "metadata": {}, - "outputs": [], - "source": [ - "df_dh = ld.data_hub_read(100_000)\n", - "df_dh_clean = ld.clean_numbers_and_names(df_dh)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "275226f9-aa29-4f96-98ee-6d3955fa9197", - "metadata": {}, - "outputs": [], - "source": [ - "df_ex = ld.hmrc_exporters_read(100_000)\n", - "df_ex_clean = ld.clean_numbers_and_names(df_ex)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "f1981605-31f0-40d0-8a08-eb33313b304e", - "metadata": {}, - "outputs": [], - "source": [ - "df_ew = ld.export_wins_read(100_000)\n", - "df_ew_clean = ld.clean_numbers_and_names(df_ew)" - ] - }, - { - "cell_type": "markdown", - "id": "f694a908-a612-429d-9c99-5bb7e089ce15", - "metadata": {}, - "source": [ - "## Link and predict" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "76c3130d-ff5e-4c7e-9c91-421d88c59dc3", - "metadata": {}, - "outputs": [], - "source": [ - "linker = DuckDBLinker(\n", - " [\n", - " df_dh_clean, \n", - " df_ch_clean, \n", - " df_ex_clean, \n", - " df_ew_clean\n", - " ],\n", - " settings,\n", - " input_table_aliases=[\n", - " \"dit_data_hub__companies\", \n", - " \"companieshouse_companies\", \n", - " \"hmrc_trade__exporters\", \n", - " \"dit_export_wins__wins_dataset\"\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "7a824346-6248-462f-8d54-aa53ea2853ab", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 8.85e-06.\n", - "This means that amongst all possible pairwise record comparisons, one in 112,941.61 are expected to match. With 62,627,736,655 total possible comparisons, we expect a total of around 554,514.29 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " recall=0.7,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "457bc209-36fc-4124-8d0b-013de18b2934", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (no m values are trained).\n", - " - name_unusual_tokens (no m values are trained).\n", - " - postcode (no m values are trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(max_pairs=1e7)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "85618fb1-9185-42be-8faf-29d7529f56b0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "---- Estimating m probabilities using from column comp_num_clean -----\n", - "m probability not trained for comp_num_clean - Jaro_winkler_similarity >= 0.75 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "m probability not trained for comp_num_clean - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some m values are not trained).\n" - ] - } - ], - "source": [ - "linker.estimate_m_from_label_column(\"comp_num_clean\")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "857cd4fb-9e5c-4b36-af4f-32a1030e5682", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "l.name_unusual_tokens = r.name_unusual_tokens and l.postcode_area = r.postcode_area\n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - " - comp_num_clean\n", - " - postcode\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n", - " - name_unusual_tokens\n", - "\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 1: Largest change in params was -0.754 in the m_probability of postcode, level `All other comparisons`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 2: Largest change in params was 0.0448 in probability_two_random_records_match\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 3: Largest change in params was -0.00363 in the m_probability of comp_num_clean, level `Exact match`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 4: Largest change in params was -0.000101 in the m_probability of comp_num_clean, level `Exact match`\n", - "\n", - "WARNING:\n", - "Level All other comparisons on comparison postcode not observed in dataset, unable to train m value\n", - "Iteration 5: Largest change in params was -2.37e-06 in the m_probability of comp_num_clean, level `Exact match`\n", - "\n", - "EM converged after 5 iterations\n", - "m probability not trained for postcode - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Your model is fully trained. All comparisons have at least one estimate for their m and u values\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m_by_name_and_postcode_area = \"l.name_unusual_tokens = r.name_unusual_tokens and l.postcode_area = r.postcode_area\"\n", - "linker.estimate_parameters_using_expectation_maximisation(m_by_name_and_postcode_area)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "ff6dbbdd-d48e-44a4-a398-88ea695120e6", - "metadata": {}, - "outputs": [], - "source": [ - "predictions = linker.predict(threshold_match_probability=0.7)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "b961953b-aff9-4b08-b697-3741582d9968", - "metadata": {}, - "outputs": [], - "source": [ - "df_predict = predictions.as_pandas_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "id": "9835c059-7acd-40dd-a60e-7c75b574103f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 145293\n", - "hmrc_trade__exporters 66647\n", - "dit_data_hub__companies 26615\n", - "companieshouse_companies 2504\n", - "Name: source_dataset_l, dtype: int64" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 154230\n", - "hmrc_trade__exporters 85181\n", - "dit_data_hub__companies 1616\n", - "companieshouse_companies 32\n", - "Name: source_dataset_r, dtype: int64" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict.source_dataset_l.value_counts()\n", - "df_predict.source_dataset_r.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "id": "3a8d14b1-9617-4297-b318-a6dd943ee51b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 256527\n", - "dit_data_hub__companies 76777\n", - "hmrc_trade__exporters 73356\n", - "companieshouse_companies 5508\n", - "Name: source_dataset_l, dtype: int64" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 257237\n", - "hmrc_trade__exporters 143475\n", - "dit_data_hub__companies 11239\n", - "companieshouse_companies 217\n", - "Name: source_dataset_r, dtype: int64" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions_2 = linker.predict()\n", - "predictions_2 = predictions_2.as_pandas_dataframe()\n", - "predictions_2.source_dataset_l.value_counts()\n", - "predictions_2.source_dataset_r.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "id": "4c671934-32dd-4b6c-a727-af977dd86b50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 27)
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rcomp_num_clean_lcomp_num_clean_rgamma_comp_num_cleantf_comp_num_clean_ltf_comp_num_clean_rbf_comp_num_cleanbf_tf_adj_comp_num_cleanname_unusual_tokens_lname_unusual_tokens_rgamma_name_unusual_tokenstf_name_unusual_tokens_ltf_name_unusual_tokens_rbf_name_unusual_tokensbf_tf_adj_name_unusual_tokenspostcode_lpostcode_rgamma_postcodebf_postcodesecondary_name_unusual_tokens_lsecondary_name_unusual_tokens_rmatch_key
f64f64strstrstrstrstrstri32f64f64f64f64strstri32f64f64f64f64strstri32f64strstrstr
8.4505920.99715"companieshouse…"hmrc_trade__ex…"06274585""1418546""6274585"null-10.000005null1.01.0"magawell""magawell"30.0000060.00000667666.279481.134151"NP11 5GT""NP11 5GT"4520.071434""null"1"
8.4505920.99715"companieshouse…"hmrc_trade__ex…"05401138""2611486""5401138"null-10.000005null1.01.0"springcoil""springcoil"30.0000060.00000667666.279481.134151"S9 3NE""S9 3NE"4520.071434""null"1"
8.4505920.99715"companieshouse…"hmrc_trade__ex…"03413004""2871186""3413004"null-10.000005null1.01.0"europe stoneag…"europe stoneag…30.0000060.00000667666.279481.134151"WR5 2DQ""WR5 2DQ"4520.071434"aquapower salo…null"1"
8.4505920.99715"companieshouse…"hmrc_trade__ex…"05434133""1269999""5434133"null-10.000005null1.01.0"dellner""dellner"30.0000060.00000667666.279481.134151"DE11 9DX""DE11 9DX"4520.071434"couplers delln…null"1"
8.4505920.99715"companieshouse…"hmrc_trade__ex…"12208468""2654556""12208468"null-10.000005null1.01.0"cocompany ligh…"cocompany ligh…30.0000060.00000667666.279481.134151"CM7 3QS""CM7 3QS"4520.071434""null"1"
" - ], - "text/plain": [ - "shape: (5, 27)\n", - "┌────────────┬────────────┬────────────┬────────────┬───┬───────────┬────────────┬────────────┬─────────┐\n", - "│ match_weig ┆ match_prob ┆ source_dat ┆ source_dat ┆ … ┆ bf_postco ┆ secondary_ ┆ secondary_ ┆ match_k │\n", - "│ ht ┆ ability ┆ aset_l ┆ aset_r ┆ ┆ de ┆ name_unusu ┆ name_unusu ┆ ey │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ al_tokens_ ┆ al_tokens_ ┆ --- │\n", - "│ f64 ┆ f64 ┆ str ┆ str ┆ ┆ f64 ┆ l ┆ r ┆ str │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ --- ┆ --- ┆ │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ │\n", - "╞════════════╪════════════╪════════════╪════════════╪═══╪═══════════╪════════════╪════════════╪═════════╡\n", - "│ 8.450592 ┆ 0.99715 ┆ companiesh ┆ hmrc_trade ┆ … ┆ 520.07143 ┆ ┆ null ┆ 1 │\n", - "│ ┆ ┆ ouse_compa ┆ __exporter ┆ ┆ 4 ┆ ┆ ┆ │\n", - "│ ┆ ┆ nies ┆ s ┆ ┆ ┆ ┆ ┆ │\n", - "│ 8.450592 ┆ 0.99715 ┆ companiesh ┆ hmrc_trade ┆ … ┆ 520.07143 ┆ ┆ null ┆ 1 │\n", - "│ ┆ ┆ ouse_compa ┆ __exporter ┆ ┆ 4 ┆ ┆ ┆ │\n", - "│ ┆ ┆ nies ┆ s ┆ ┆ ┆ ┆ ┆ │\n", - "│ 8.450592 ┆ 0.99715 ┆ companiesh ┆ hmrc_trade ┆ … ┆ 520.07143 ┆ aquapower ┆ null ┆ 1 │\n", - "│ ┆ ┆ ouse_compa ┆ __exporter ┆ ┆ 4 ┆ salotech ┆ ┆ │\n", - "│ ┆ ┆ nies ┆ s ┆ ┆ ┆ ┆ ┆ │\n", - "│ 8.450592 ┆ 0.99715 ┆ companiesh ┆ hmrc_trade ┆ … ┆ 520.07143 ┆ couplers ┆ null ┆ 1 │\n", - "│ ┆ ┆ ouse_compa ┆ __exporter ┆ ┆ 4 ┆ dellner ┆ ┆ │\n", - "│ ┆ ┆ nies ┆ s ┆ ┆ ┆ ┆ ┆ │\n", - "│ 8.450592 ┆ 0.99715 ┆ companiesh ┆ hmrc_trade ┆ … ┆ 520.07143 ┆ ┆ null ┆ 1 │\n", - "│ ┆ ┆ ouse_compa ┆ __exporter ┆ ┆ 4 ┆ ┆ ┆ │\n", - "│ ┆ ┆ nies ┆ s ┆ ┆ ┆ ┆ ┆ │\n", - "└────────────┴────────────┴────────────┴────────────┴───┴───────────┴────────────┴────────────┴─────────┘" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " pl.from_pandas(predictions_2)\n", - " .filter(pl.col('source_dataset_r') == 'hmrc_trade__exporters')\n", - " .sort(by = 'match_probability', descending = True)\n", - " .head(5)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "66a134f4-e789-482f-8b2b-8176884d3332", - "metadata": {}, - "source": [ - "## Cluster experiment 2" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "3f1a5ac4-d78e-416a-833d-718412f5e5f6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Completed iteration 1, root rows count 1\n", - "Completed iteration 2, root rows count 0\n" - ] - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " predictions,\n", - " threshold_match_probability = 0.7\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "a8987b59-a156-499d-907c-882eec1bf9f7", - "metadata": {}, - "outputs": [], - "source": [ - "linker.cluster_studio_dashboard(\n", - " predictions, \n", - " clusters, \n", - " \"cluster_studio.html\", \n", - " sampling_method=\"by_cluster_size\", \n", - " overwrite=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "id": "7b238f0f-8368-44ff-bd22-db371c4538ee", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "IFrame(\n", - " src=\"./cluster_studio.html\", \n", - " width=\"100%\", \n", - " height=1000\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "dae6372e-68c4-4ea9-a07a-a28734ab2c55", - "metadata": {}, - "source": [ - "## Cluster experiment 1" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "c83e946e-57e6-4564-87b1-87187391bf3c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Completed iteration 1, root rows count 26\n", - "Completed iteration 2, root rows count 0\n" - ] - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(\n", - " predictions,\n", - " threshold_match_probability=0.7,\n", - " pairwise_formatting=True,\n", - " filter_pairwise_format_for_clusters=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "0594025a-cb24-48ea-98c2-f1bd2f977cca", - "metadata": {}, - "outputs": [], - "source": [ - "df_clusters = clusters.as_pandas_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "5617a380-ed0f-4bbf-beb2-e50a93b4c807", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rcomp_num_clean_lcomp_num_clean_rgamma_comp_num_cleantf_comp_num_clean_ltf_comp_num_clean_rbf_comp_num_cleanbf_tf_adj_comp_num_cleanname_unusual_tokens_lname_unusual_tokens_rgamma_name_unusual_tokenstf_name_unusual_tokens_ltf_name_unusual_tokens_rbf_name_unusual_tokensbf_tf_adj_name_unusual_tokenspostcode_area_lpostcode_area_rgamma_postcode_areabf_postcode_areasecondary_name_unusual_tokens_lsecondary_name_unusual_tokens_rmatch_keycluster_id_lcluster_id_r
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [match_weight, match_probability, source_dataset_l, source_dataset_r, unique_id_l, unique_id_r, comp_num_clean_l, comp_num_clean_r, gamma_comp_num_clean, tf_comp_num_clean_l, tf_comp_num_clean_r, bf_comp_num_clean, bf_tf_adj_comp_num_clean, name_unusual_tokens_l, name_unusual_tokens_r, gamma_name_unusual_tokens, tf_name_unusual_tokens_l, tf_name_unusual_tokens_r, bf_name_unusual_tokens, bf_tf_adj_name_unusual_tokens, postcode_area_l, postcode_area_r, gamma_postcode_area, bf_postcode_area, secondary_name_unusual_tokens_l, secondary_name_unusual_tokens_r, match_key, cluster_id_l, cluster_id_r]\n", - "Index: []" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_clusters[df_clusters.cluster_id_l != df_clusters.cluster_id_r].head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "e5f9cae8-5d49-4904-aef0-258c6dd9a0b7", - "metadata": {}, - "source": [ - "I told Leo there was a problem when these don't match. I think they only don't match when the _prediction_ threshold and _clustering_ threshold don't match. When clustering is higher than prediction, you end up with dangling, clusterless matches in the pairwise dataframe.\n", - "\n", - "Consider the following where prediction threshold was 0.5:\n", - "\n", - "```\n", - "A -> B (0.5)\n", - "B -> C (0.7)\n", - "```\n", - "\n", - "With clustering threshold 0.7:\n", - "\n", - "```\n", - "A, 1\n", - "B, 2\n", - "C, 2\n", - "```\n", - "\n", - "And in the pairwise dataframe:\n", - "\n", - "```\n", - "A (cluster 1) -> B (cluster 2) (0.5)\n", - "B (cluster 2) -> C (cluster 2) (0.7)\n", - "```\n", - "\n", - "But if they both match, where the prediction and clustering thresholds are both 0.5:\n", - "\n", - "```\n", - "A (cluster 1) -> B (cluster 1) (0.5)\n", - "B (cluster 1) -> C (cluster 1) (0.7)\n", - "```\n", - "\n", - "Indeed, we can confirm by forcing the imbalance and checking that every combination of cluster l/r is unique in the clustering mismatches in the pairwise dataframe. They are." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "cb96c6d1-2a7a-4756-a39a-e48f9605e0a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (0, 3)
cluster_id_lcluster_id_rcount
strstru32
" - ], - "text/plain": [ - "shape: (0, 3)\n", - "┌──────────────┬──────────────┬───────┐\n", - "│ cluster_id_l ┆ cluster_id_r ┆ count │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ u32 │\n", - "╞══════════════╪══════════════╪═══════╡\n", - "└──────────────┴──────────────┴───────┘" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " pl.from_pandas(df_clusters[df_clusters.cluster_id_l != df_clusters.cluster_id_r])\n", - " .groupby(['cluster_id_l', 'cluster_id_r'])\n", - " .count()\n", - " .filter(pl.col('count') > 1)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "id": "bc3ea12a-2dc5-4405-96e9-245bb5b69bfa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 146817\n", - "dit_data_hub__companies 8967\n", - "companieshouse_companies 1289\n", - "Name: source_dataset_l, dtype: int64" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 155957\n", - "dit_data_hub__companies 1116\n", - "Name: source_dataset_r, dtype: int64" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_clusters.source_dataset_l.value_counts()\n", - "df_clusters.source_dataset_r.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "de134e7d-8540-4714-b00d-43e1e0e2b181", - "metadata": {}, - "outputs": [], - "source": [ - "lookup = duckdb.sql(\"\"\"\n", - " select\n", - " source_dataset_l as source,\n", - " unique_id_l as source_id,\n", - " cluster_id_l as source_cluster,\n", - " source_dataset_r as target,\n", - " unique_id_r as target_id,\n", - " cluster_id_r as target_cluster,\n", - " match_probability\n", - " from\n", - " df_clusters\n", - " union\n", - " select\n", - " source_dataset_r as source,\n", - " unique_id_r as source_id,\n", - " cluster_id_r as source_cluster,\n", - " source_dataset_l as target,\n", - " unique_id_l as target_id,\n", - " cluster_id_l as target_cluster,\n", - " match_probability\n", - " from\n", - " df_clusters\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "id": "782e3334-558d-43f3-a314-21d60dd22690", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dit_export_wins__wins_dataset 299523\n", - "hmrc_trade__exporters 151828\n", - "dit_data_hub__companies 28231\n", - "companieshouse_companies 2536\n", - "Name: source, dtype: int64" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lookup.df().source.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "90b56b9d-8416-4206-bc82-3d632f3fe570", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sourcesource_idsource_clustertargettarget_idtarget_clustermatch_probability
0dit_export_wins__wins_dataset1dbec4d3-da65-4642-b343-4e611f41982ddit_export_wins__wins_dataset-__-1dbec4d3-da65...dit_export_wins__wins_dataseta9a73745-1696-48bd-beb7-522a2cd325c6dit_export_wins__wins_dataset-__-1dbec4d3-da65...0.999913
1dit_export_wins__wins_dataset2e724392-abcf-437e-a8c5-fec6f2a72fe2dit_export_wins__wins_dataset-__-1dc53fda-805a...dit_export_wins__wins_datasetb9b3badd-68a9-48f8-b800-50ac146bf91cdit_export_wins__wins_dataset-__-1dc53fda-805a...0.999935
2dit_export_wins__wins_dataset1df38b15-ba6e-4378-9ded-753cc27dc87ddit_export_wins__wins_dataset-__-1df38b15-ba6e...dit_export_wins__wins_dataset3485848d-8ffd-4ef1-a4ee-479d47bc448bdit_export_wins__wins_dataset-__-1df38b15-ba6e...0.999674
3dit_export_wins__wins_dataset1df38b15-ba6e-4378-9ded-753cc27dc87ddit_export_wins__wins_dataset-__-1df38b15-ba6e...dit_export_wins__wins_dataset764b6f8a-4ac8-4b5f-a8f3-3164ff41d4b9dit_export_wins__wins_dataset-__-1df38b15-ba6e...0.999674
4dit_export_wins__wins_dataset764b6f8a-4ac8-4b5f-a8f3-3164ff41d4b9dit_export_wins__wins_dataset-__-1df38b15-ba6e...dit_export_wins__wins_datasete6b6f554-f5ff-4658-8079-0134e09e00a0dit_export_wins__wins_dataset-__-1df38b15-ba6e...0.999674
........................
313537dit_export_wins__wins_datasetecd3306e-1909-461f-b994-c028da621ca9dit_export_wins__wins_dataset-__-0e68c25c-0230...dit_export_wins__wins_dataset409b43e7-14d6-488e-9bb9-de81c7aa3704dit_export_wins__wins_dataset-__-0e68c25c-0230...0.999823
313538dit_export_wins__wins_datasetd3bbbb75-34eb-43c0-bfbf-d772b64d1a72dit_export_wins__wins_dataset-__-0e6f01a3-98be...dit_export_wins__wins_dataset8c1940f2-6c0c-4aee-97cc-1ee67937c0d3dit_export_wins__wins_dataset-__-0e6f01a3-98be...0.999772
313539dit_export_wins__wins_datasetcb3a076c-bc51-4950-a31a-0e47029ab1dedit_export_wins__wins_dataset-__-0e714516-011d...dit_export_wins__wins_dataset47d90de2-7ee4-45b9-a3c4-dd92e3ab7132dit_export_wins__wins_dataset-__-0e714516-011d...0.999436
313540dit_export_wins__wins_dataseta5699268-a2e9-4d8b-9b15-0956dc37f36cdit_export_wins__wins_dataset-__-0e714516-011d...dit_export_wins__wins_dataset6599a87b-2e42-4e99-bcf1-cbcc7695dd77dit_export_wins__wins_dataset-__-0e714516-011d...0.999436
313541dit_export_wins__wins_datasetf7a63b54-a909-40e4-a8b8-1ff8784c9accdit_export_wins__wins_dataset-__-0e788c34-27d6...dit_export_wins__wins_datasetbe8a696b-892a-40fe-b91c-eb163bb5c913dit_export_wins__wins_dataset-__-0e788c34-27d6...0.999768
\n", - "

313542 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " source source_id \\\n", - "0 dit_export_wins__wins_dataset 1dbec4d3-da65-4642-b343-4e611f41982d \n", - "1 dit_export_wins__wins_dataset 2e724392-abcf-437e-a8c5-fec6f2a72fe2 \n", - "2 dit_export_wins__wins_dataset 1df38b15-ba6e-4378-9ded-753cc27dc87d \n", - "3 dit_export_wins__wins_dataset 1df38b15-ba6e-4378-9ded-753cc27dc87d \n", - "4 dit_export_wins__wins_dataset 764b6f8a-4ac8-4b5f-a8f3-3164ff41d4b9 \n", - "... ... ... \n", - "313537 dit_export_wins__wins_dataset ecd3306e-1909-461f-b994-c028da621ca9 \n", - "313538 dit_export_wins__wins_dataset d3bbbb75-34eb-43c0-bfbf-d772b64d1a72 \n", - "313539 dit_export_wins__wins_dataset cb3a076c-bc51-4950-a31a-0e47029ab1de \n", - "313540 dit_export_wins__wins_dataset a5699268-a2e9-4d8b-9b15-0956dc37f36c \n", - "313541 dit_export_wins__wins_dataset f7a63b54-a909-40e4-a8b8-1ff8784c9acc \n", - "\n", - " source_cluster \\\n", - "0 dit_export_wins__wins_dataset-__-1dbec4d3-da65... \n", - "1 dit_export_wins__wins_dataset-__-1dc53fda-805a... \n", - "2 dit_export_wins__wins_dataset-__-1df38b15-ba6e... \n", - "3 dit_export_wins__wins_dataset-__-1df38b15-ba6e... \n", - "4 dit_export_wins__wins_dataset-__-1df38b15-ba6e... \n", - "... ... \n", - "313537 dit_export_wins__wins_dataset-__-0e68c25c-0230... \n", - "313538 dit_export_wins__wins_dataset-__-0e6f01a3-98be... \n", - "313539 dit_export_wins__wins_dataset-__-0e714516-011d... \n", - "313540 dit_export_wins__wins_dataset-__-0e714516-011d... \n", - "313541 dit_export_wins__wins_dataset-__-0e788c34-27d6... \n", - "\n", - " target target_id \\\n", - "0 dit_export_wins__wins_dataset a9a73745-1696-48bd-beb7-522a2cd325c6 \n", - "1 dit_export_wins__wins_dataset b9b3badd-68a9-48f8-b800-50ac146bf91c \n", - "2 dit_export_wins__wins_dataset 3485848d-8ffd-4ef1-a4ee-479d47bc448b \n", - "3 dit_export_wins__wins_dataset 764b6f8a-4ac8-4b5f-a8f3-3164ff41d4b9 \n", - "4 dit_export_wins__wins_dataset e6b6f554-f5ff-4658-8079-0134e09e00a0 \n", - "... ... ... \n", - "313537 dit_export_wins__wins_dataset 409b43e7-14d6-488e-9bb9-de81c7aa3704 \n", - "313538 dit_export_wins__wins_dataset 8c1940f2-6c0c-4aee-97cc-1ee67937c0d3 \n", - "313539 dit_export_wins__wins_dataset 47d90de2-7ee4-45b9-a3c4-dd92e3ab7132 \n", - "313540 dit_export_wins__wins_dataset 6599a87b-2e42-4e99-bcf1-cbcc7695dd77 \n", - "313541 dit_export_wins__wins_dataset be8a696b-892a-40fe-b91c-eb163bb5c913 \n", - "\n", - " target_cluster match_probability \n", - "0 dit_export_wins__wins_dataset-__-1dbec4d3-da65... 0.999913 \n", - "1 dit_export_wins__wins_dataset-__-1dc53fda-805a... 0.999935 \n", - "2 dit_export_wins__wins_dataset-__-1df38b15-ba6e... 0.999674 \n", - "3 dit_export_wins__wins_dataset-__-1df38b15-ba6e... 0.999674 \n", - "4 dit_export_wins__wins_dataset-__-1df38b15-ba6e... 0.999674 \n", - "... ... ... \n", - "313537 dit_export_wins__wins_dataset-__-0e68c25c-0230... 0.999823 \n", - "313538 dit_export_wins__wins_dataset-__-0e6f01a3-98be... 0.999772 \n", - "313539 dit_export_wins__wins_dataset-__-0e714516-011d... 0.999436 \n", - "313540 dit_export_wins__wins_dataset-__-0e714516-011d... 0.999436 \n", - "313541 dit_export_wins__wins_dataset-__-0e788c34-27d6... 0.999768 \n", - "\n", - "[313542 rows x 7 columns]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lookup.df()" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "05bb1d3b-3c49-48bb-85d1-58a1dccf1cbc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────────────┬──────────────────────────────────┐\n", - "│ company_name │ company_name │\n", - "│ varchar │ varchar │\n", - "├──────────────────────────────────┼──────────────────────────────────┤\n", - "│ MICRO:BIT EDUCATIONAL FOUNDATION │ MICRO:BIT EDUCATIONAL FOUNDATION │\n", - "│ TSL PROFESSIONAL PRODUCTS LTD. │ TSL Professional Products Ltd │\n", - "│ FORTIS TECHNOLOGIES LIMITED │ FORTIS TECHNOLOGIES LIMITED │\n", - "│ RAILD IMPORTS LIMITED │ Raild Imports Ltd │\n", - "│ DOMINO UK LIMITED │ Domino UK Ltd │\n", - "│ PS DISTRIBUTION LTD │ PS DISTRIBUTION LTD │\n", - "│ WILD LIFE WORLD LIMITED │ Wild Life World │\n", - "│ PEAK COMMUNICATIONS LIMITED │ Peak Communications Limited │\n", - "│ ANYWHERE WI-FI LIMITED │ ANYWHERE WI-FI LIMITED │\n", - "│ THERMOTEKNIX SYSTEMS LIMITED │ Thermoteknix Systems Ltd │\n", - "│ · │ · │\n", - "│ · │ · │\n", - "│ · │ · │\n", - "│ RYSE ENERGY (UK) LIMITED │ RYSE ENERGY (UK) LIMITED │\n", - "│ KOBUS SERVICES LIMITED │ KOBUS SERVICES LIMITED │\n", - "│ FABFUNKY LTD │ Fabfunky Ltd │\n", - "│ FABFUNKY LTD │ Fabfunky Ltd │\n", - "│ SEA-BAND LIMITED │ SEA-BAND LIMITED │\n", - "│ TEMPLE FORTUNE LTD │ Temple Fortune LTD │\n", - "│ AERLOOM LONDON LIMITED │ AERLOOM LONDON LIMITED │\n", - "│ RYSE ENERGY (UK) LIMITED │ Ryse Energy │\n", - "│ TEMPLE FORTUNE LTD │ Temple Fortune LTD │\n", - "│ ROMA PRAMS LIMITED │ Roma Prams Limited │\n", - "├──────────────────────────────────┴──────────────────────────────────┤\n", - "│ 8568 rows (20 shown) 2 columns │\n", - "└─────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 100, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " dh.company_name,\n", - " ew.company_name\n", - " from\n", - " lookup lookup\n", - " inner join df_dh dh on\n", - " lookup.source_id = dh.unique_id and\n", - " lookup.source = 'dit_data_hub__companies'\n", - " inner join df_ew ew on\n", - " lookup.target_id = ew.unique_id and\n", - " lookup.target = 'dit_export_wins__wins_dataset'\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "66607f98-d895-4f53-b1c5-f0324beff981", - "metadata": {}, - "source": [ - "## Lookup testing" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b254f876-d57c-4094-b93c-c0872236cd8a", - "metadata": {}, - "outputs": [], - "source": [ - "lookup = duckdb.sql(\"\"\"\n", - " select\n", - " source_dataset_l as source,\n", - " unique_id_l as source_id,\n", - " cluster_id_l as source_cluster,\n", - " source_dataset_r as target,\n", - " unique_id_r as target_id,\n", - " cluster_id_r as target_cluster,\n", - " match_probability\n", - " from\n", - " df_clusters\n", - " union\n", - " select\n", - " source_dataset_r as source,\n", - " unique_id_r as source_id,\n", - " cluster_id_r as source_cluster,\n", - " source_dataset_l as target,\n", - " unique_id_l as target_id,\n", - " cluster_id_l as target_cluster,\n", - " match_probability\n", - " from\n", - " df_clusters\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "69b6208a-8973-4409-a23a-363100af75cb", - "metadata": {}, - "source": [ - "21/6: This seems like the best selection method for joining an eventual lookup. This RETAINS duplicates. If there are lots of export wins in the target dataset, every one is getting returned. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "d5eb8416-c837-4e11-bffc-b10f2ebc90fa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100273, 4)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────────┬─────────────────────────────────────────────┬─────────────────────────────────────────────┬─────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼─────────────────────────────────────────────┼─────────────────────────────────────────────┼─────────┤\n", - "│ 01341280 │ T.E.L. ENGINEERING LIMITED │ T.E.L. ENGINEERING LIMITED │ NULL │\n", - "│ 02925653 │ CORPORATE DOCUMENT SERVICES LIMITED │ CORPORATE DOCUMENT SERVICES LIMITED │ NULL │\n", - "│ 04650763 │ KEYSTONE LAW LIMITED │ KEYSTONE LAW LIMITED │ NULL │\n", - "│ 05517238 │ DESCOMED LIMITED │ DESCOMED LIMITED │ NULL │\n", - "│ 05912338 │ IMA (KLESSMANN) UK LTD │ IMA (KLESSMANN) UK LTD │ NULL │\n", - "│ 07171071 │ HS VENTURES LTD │ HS VENTURES LTD │ NULL │\n", - "│ 07661388 │ PR AGENCY ONE LTD │ PR AGENCY ONE LTD │ NULL │\n", - "│ 08267996 │ DRINKWELL BEVERAGES LIMITED │ DRINKWELL BEVERAGES LIMITED │ NULL │\n", - "│ 08355388 │ ALLIED PROTEK ENGINEERING SOLUTIONS LIMITED │ ALLIED PROTEK ENGINEERING SOLUTIONS LIMITED │ NULL │\n", - "│ 08813662 │ VIKING SYSTEMS LIMITED │ VIKING SYSTEMS LIMITED │ NULL │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ 08452243 │ L WOOLLARD LTD │ NULL │ NULL │\n", - "│ 07536706 │ RBM MEDIA LIMITED │ NULL │ NULL │\n", - "│ 04957037 │ ABBEYDEAN LIMITED │ NULL │ NULL │\n", - "│ 12888136 │ BAKA TRANS LIMITED │ NULL │ NULL │\n", - "│ 08018267 │ CLAIRE WILSON CONSULTING LIMITED │ NULL │ NULL │\n", - "│ 08096083 │ IN CAR PRODUCTS LIMITED │ NULL │ NULL │\n", - "│ 10199031 │ VIA PROPERTIES DEVELOPMENTS LTD │ NULL │ NULL │\n", - "│ SC701232 │ PARKDALE HOLDINGS LTD │ NULL │ NULL │\n", - "│ 10737855 │ COLONIAL RECRUITMENT LONDON LIMITED │ NULL │ NULL │\n", - "│ 05046203 │ GREENKEY PROPERTIES LIMITED │ NULL │ NULL │\n", - "├───────────┴─────────────────────────────────────────────┴─────────────────────────────────────────────┴─────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 4 columns │\n", - "└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_with_dupes = duckdb.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select \n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " ) lookup\n", - " right outer join df_ch ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join df_dh dh on\n", - " lookup.target_id = dh.unique_id \n", - " and lookup.target = 'dit_data_hub__companies'\n", - " left join df_ew ew on\n", - " lookup.target_id = ew.unique_id\n", - " and lookup.target = 'dit_export_wins__wins_dataset'\n", - "\"\"\")\n", - "\n", - "join_with_dupes.df().shape\n", - "join_with_dupes" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "id": "5ef6adf2-287a-427b-bebc-a067219a4ccb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "100000" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "dh_name 972\n", - "ew_name 369\n", - "dtype: int64" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_with_dupes.df()['unique_id'].nunique()\n", - "join_with_dupes.df()[['dh_name', 'ew_name']].notnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "id": "62606ad8-5654-4325-9761-576be111e8f7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────┬──────────────┐\n", - "│ unique_id │ count_star() │\n", - "│ varchar │ int64 │\n", - "├───────────┼──────────────┤\n", - "│ 04338382 │ 114 │\n", - "│ 01243967 │ 18 │\n", - "│ 03947927 │ 12 │\n", - "│ 04501699 │ 9 │\n", - "│ 02122174 │ 8 │\n", - "└───────────┴──────────────┘" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " unique_id,\n", - " count(*)\n", - " from\n", - " join_with_dupes\n", - " group by\n", - " unique_id\n", - " having\n", - " count(*) > 1\n", - " order by\n", - " count(*) desc\n", - " limit\n", - " 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "aa1694c0-6101-4619-81ca-7d4af6b482de", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────┬───────────┬───────────────────────────────┬──────────────────────────────────────┐\n", - "│ source │ source_id │ target │ target_id │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├──────────────────────────┼───────────┼───────────────────────────────┼──────────────────────────────────────┤\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 92a9911a-8b61-4353-a892-af7d0c350dd2 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 45d7793c-1506-4ddf-a1fd-757ce81c0d1f │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 2bafbb00-bdea-4227-b64e-4813ea1b9257 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ e33a2dc7-ffb5-4441-be77-5e5c7147f9dd │\n", - "│ companieshouse_companies │ 04338382 │ hmrc_trade__exporters │ 1642517 │\n", - "│ companieshouse_companies │ 04338382 │ hmrc_trade__exporters │ 2190502 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ df4ab573-7845-4944-a2ad-40591b43e49d │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 01791539-0425-4e66-9b5d-689de852c5ad │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 9af6062e-fd06-4bb8-ae5e-b22035ed1b2f │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 97bcb9c9-51ca-4d24-89be-a382e1605366 │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ b1c5181d-b10c-4e3a-98dc-2c7a827391b3 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 80bfd1ac-13da-4727-866d-6d345891ccc0 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 354a5653-d6ec-4ae7-b539-eeba3f8d5f7b │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 75475bd7-b1c0-4f20-957f-186fe24b8837 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 14d0844d-77f4-4474-a30f-44285f4e9da7 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 167b5651-37f6-4dc2-af15-401f077fa498 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 3fd57eec-2d5d-4b1d-96ea-f51f28a15a48 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 42fa45d9-be44-4b51-b2fc-c504cdead8b6 │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ a8cc0d40-21aa-4164-9045-216449be18bc │\n", - "│ companieshouse_companies │ 04338382 │ dit_export_wins__wins_dataset │ 5ee1a42d-f864-46a6-9902-c9a81c1839ef │\n", - "├──────────────────────────┴───────────┴───────────────────────────────┴──────────────────────────────────────┤\n", - "│ 116 rows (20 shown) 4 columns │\n", - "└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " source, source_id, target, target_id\n", - " from\n", - " lookup\n", - " where\n", - " source_id = '04338382'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "19636e09-2b37-4be1-a097-b72e8c59653d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────────────────┬────────────────┬─────────────────┬─────────────────┬──────────┐\n", - "│ unique_id │ company_number │ company_name │ secondary_names │ postcode │\n", - "│ varchar │ varchar │ varchar │ int32 │ int32 │\n", - "├──────────────────────────────────────┼────────────────┼─────────────────┼─────────────────┼──────────┤\n", - "│ 4db14954-4396-43a4-b8bd-86b1f0c010c7 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ bcba2b26-c263-4653-9ec9-b33cf8e5b27a │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 24c0c65f-66ab-446d-bde3-83822098c644 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 3900dcae-80a7-40c7-bd5f-322603b02198 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ eb2ecb03-ed37-4f0d-ad5e-220a1201b5a8 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 2cc0f862-83b9-4123-bc38-27451efa1be9 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ e33a2dc7-ffb5-4441-be77-5e5c7147f9dd │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 80b0fd21-7644-4c17-900a-3c56e92d47f6 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 61eb3d6d-5128-4dda-832f-f6276b7d7f5c │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 15734a3a-03fb-4d62-9eda-4d77a043aff3 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │\n", - "│ 92a9911a-8b61-4353-a892-af7d0c350dd2 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 80bfd1ac-13da-4727-866d-6d345891ccc0 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ b1c5181d-b10c-4e3a-98dc-2c7a827391b3 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ c8ce120d-0405-4bbb-95f4-569e14979c08 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 5114a667-5514-4a01-97a3-754c37c25614 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ a52ff3b1-fd26-4bfd-ab39-d7b06389531d │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 2bafbb00-bdea-4227-b64e-4813ea1b9257 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ 55fcd382-521b-4f6b-b438-323d21e66f57 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ f1a3aa0e-2e3e-4d86-ba84-52d33ac56d97 │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "│ e5f1da00-4596-4a44-ae87-732315c33cca │ 04338382 │ Ruark Audio Ltd │ NULL │ NULL │\n", - "├──────────────────────────────────────┴────────────────┴─────────────────┴─────────────────┴──────────┤\n", - "│ 114 rows (20 shown) 5 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " df_ew\n", - " where\n", - " unique_id in (\n", - " select\n", - " target_id\n", - " from\n", - " lookup\n", - " where\n", - " source_id = '04338382'\n", - " and target = 'dit_export_wins__wins_dataset'\n", - " )\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "3e70f810-67ca-4b3e-9c67-a6d6f8d5d37d", - "metadata": {}, - "source": [ - "21/6: Here's my attempt with deduplication. For wins this makes little sense. For companies house to data hub, it makes loads -- you only want the match with the highest probability. Note we do this using clusters as the ID is unique to the row." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "2d2a7411-0d3b-42af-9ed8-f111a9612258", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100000, 4)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────────┬─────────────────────────────────────┬─────────────────────────────────────────────┬──────────────────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼─────────────────────────────────────┼─────────────────────────────────────────────┼──────────────────────┤\n", - "│ 01341280 │ T.E.L. ENGINEERING LIMITED │ T.E.L. ENGINEERING LIMITED │ NULL │\n", - "│ 01910675 │ QUARTEX COMPONENTS LIMITED │ QUARTEX COMPONENTS LIMITED │ NULL │\n", - "│ 02925653 │ CORPORATE DOCUMENT SERVICES LIMITED │ CORPORATE DOCUMENT SERVICES LIMITED │ NULL │\n", - "│ 04650763 │ KEYSTONE LAW LIMITED │ KEYSTONE LAW LIMITED │ KEYSTONE LAW LIMITED │\n", - "│ 05912338 │ IMA (KLESSMANN) UK LTD │ IMA (KLESSMANN) UK LTD │ NULL │\n", - "│ 07171071 │ HS VENTURES LTD │ HS VENTURES LTD │ NULL │\n", - "│ 07661388 │ PR AGENCY ONE LTD │ PR AGENCY ONE LTD │ NULL │\n", - "│ 08267996 │ DRINKWELL BEVERAGES LIMITED │ DRINKWELL BEVERAGES LIMITED │ NULL │\n", - "│ 08355388 │ ALLIED PROTEK ENGINEERING SOLUTIO… │ ALLIED PROTEK ENGINEERING SOLUTIONS LIMITED │ NULL │\n", - "│ 08813662 │ VIKING SYSTEMS LIMITED │ VIKING SYSTEMS LIMITED │ Viking Systems Ltd. │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ 12454032 │ JASPER JENNINGS LIMITED │ NULL │ NULL │\n", - "│ 14362422 │ SEDLEY LIMITED │ NULL │ NULL │\n", - "│ 04315362 │ ROSSINI SERVICES LIMITED │ NULL │ NULL │\n", - "│ 14239345 │ CLAIRE BYRNE SOCIAL WORK LIMITED │ NULL │ NULL │\n", - "│ NI680889 │ K & R CARS LTD │ NULL │ NULL │\n", - "│ 10912982 │ 81A ALBERT BRIDGE ROAD FREEHOLD L… │ NULL │ NULL │\n", - "│ 13014537 │ HUGO MASCIE-TAYLOR CONSULTING LIM… │ NULL │ NULL │\n", - "│ 08333921 │ BLACKSTAR EQUITIES LIMITED │ NULL │ NULL │\n", - "│ 11336418 │ AASOG LIMITED │ NULL │ NULL │\n", - "│ 14554807 │ WOOD LETTINGS LTD │ NULL │ NULL │\n", - "├───────────┴─────────────────────────────────────┴─────────────────────────────────────────────┴──────────────────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 4 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_no_dupes = duckdb.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select\n", - " source,\n", - " source_id,\n", - " array_agg(target) as target, \n", - " array_agg(target_id) as target_id\n", - " from (\n", - " select distinct on (\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster\n", - " )\n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster,\n", - " lookup.match_probability desc\n", - " ) lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " group by\n", - " source,\n", - " source_id\n", - " ) lookup\n", - " right join df_ch ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join df_dh dh on\n", - " array_has(lookup.target_id, dh.unique_id)\n", - " and array_has(lookup.target, 'dit_data_hub__companies')\n", - " left join df_ew ew on\n", - " array_has(lookup.target_id, ew.unique_id)\n", - " and array_has(lookup.target, 'dit_export_wins__wins_dataset')\n", - "\"\"\")\n", - "\n", - "join_no_dupes.df().shape\n", - "join_no_dupes" - ] - }, - { - "cell_type": "markdown", - "id": "2f96e621-5202-4a36-ad36-cf0610ed72f4", - "metadata": {}, - "source": [ - "## Joining experiments" - ] - }, - { - "cell_type": "markdown", - "id": "49b901e1-3952-4bd2-bbac-6ee8970b511d", - "metadata": {}, - "source": [ - "`03104628` is a good test case." - ] - }, - { - "cell_type": "code", - "execution_count": 152, - "id": "dcea0fa4-6bb6-43be-9e91-b2bcff1923a9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'companieshouse_companies-__-03104628'" - ] - }, - "execution_count": 152, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " source_cluster\n", - " from\n", - " lookup\n", - " where\n", - " source_id = '03104628'\n", - "\"\"\").df().iloc[0,0]" - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "id": "744df402-f087-4602-815a-a7dea9c87400", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────┬──────────────────────┬───┬──────────────────────┬──────────────────────┬────────────────────┐\n", - "│ source │ source_id │ … │ target_id │ target_cluster │ match_probability │\n", - "│ varchar │ varchar │ │ varchar │ varchar │ double │\n", - "├──────────────────────┼──────────────────────┼───┼──────────────────────┼──────────────────────┼────────────────────┤\n", - "│ companieshouse_com… │ 03104628 │ … │ 729e4a59-ec8e-46b6… │ companieshouse_com… │ 0.9999652872525622 │\n", - "│ companieshouse_com… │ 03104628 │ … │ 2a64728a-1afa-4121… │ companieshouse_com… │ 0.999999933251573 │\n", - "│ dit_data_hub__comp… │ 2a64728a-1afa-4121… │ … │ 729e4a59-ec8e-46b6… │ companieshouse_com… │ 0.9999652872525622 │\n", - "│ dit_export_wins__w… │ 729e4a59-ec8e-46b6… │ … │ 03104628 │ companieshouse_com… │ 0.9999652872525622 │\n", - "│ dit_data_hub__comp… │ 2a64728a-1afa-4121… │ … │ 03104628 │ companieshouse_com… │ 0.999999933251573 │\n", - "│ dit_export_wins__w… │ 729e4a59-ec8e-46b6… │ … │ 2a64728a-1afa-4121… │ companieshouse_com… │ 0.9999652872525622 │\n", - "├──────────────────────┴──────────────────────┴───┴──────────────────────┴──────────────────────┴────────────────────┤\n", - "│ 6 rows 7 columns (5 shown) │\n", - "└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " lookup\n", - " where\n", - " source_cluster = 'companieshouse_companies-__-03104628'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "adc7b0c2-ec12-4fad-acda-1a4159419b5e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────┬──────────────────────────────────────────────────────────┬─────────────────────────────────────┬─────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼──────────────────────────────────────────────────────────┼─────────────────────────────────────┼─────────┤\n", - "│ 00591960 │ CALDER OILS LIMITED │ CALDER OILS LIMITED │ NULL │\n", - "│ 02926804 │ CHIEF PRODUCTIONS LIMITED │ CHIEF PRODUCTIONS LIMITED │ NULL │\n", - "│ 05325357 │ ONE STOP PROMOTIONS LIMITED │ ONE STOP PROMOTIONS LIMITED │ NULL │\n", - "│ 05537361 │ CONCRETE CANVAS LIMITED │ CONCRETE CANVAS LIMITED │ NULL │\n", - "│ 05576852 │ MINDRAY (UK) LIMITED │ MINDRAY (UK) LIMITED │ NULL │\n", - "│ 07735930 │ LAZARUS TRAINING LTD │ LAZARUS TRAINING LTD │ NULL │\n", - "│ 07928073 │ THRIVE THERAPEUTIC SOFTWARE LIMITED │ THRIVE THERAPEUTIC SOFTWARE LIMITED │ NULL │\n", - "│ 08155213 │ PIING LIMITED │ PIING GROUP LIMITED │ NULL │\n", - "│ 09182461 │ FREE RUNNING BUILDINGS LIMITED │ FREE RUNNING BUILDINGS LIMITED │ NULL │\n", - "│ 10962926 │ TREASURED TIMES LIMITED │ TREASURED TIMES LIMITED │ NULL │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ 02184638 │ YORK IMPORTS │ NULL │ NULL │\n", - "│ 14586231 │ BABYLON SHAWARMA LTD │ NULL │ NULL │\n", - "│ 05280253 │ DE MONTFORT PARK (ASHFORD - PHASE 1) MANAGEMENT COMPAN… │ NULL │ NULL │\n", - "│ 13680329 │ RS AUTO LEAD LTD │ NULL │ NULL │\n", - "│ 09515287 │ K EUROPEAN LIMITED │ NULL │ NULL │\n", - "│ 09846830 │ A.J. CORNALL (HOLDINGS) LIMITED │ NULL │ NULL │\n", - "│ 04257948 │ THE TRAINING & DEVELOPMENT CONSULTANCY LTD │ NULL │ NULL │\n", - "│ 10405944 │ SW PROPERTIES (NW) LTD │ NULL │ NULL │\n", - "│ 11298808 │ BEETON CONSULTING LIMITED │ NULL │ NULL │\n", - "│ 12701688 │ ABP MIDCO UK HOLDINGS LIMITED │ NULL │ NULL │\n", - "├───────────┴──────────────────────────────────────────────────────────┴─────────────────────────────────────┴─────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 4 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 174, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select\n", - " source,\n", - " source_id,\n", - " array_agg(target) as target, \n", - " array_agg(target_id) as target_id\n", - " from (\n", - " select distinct on (\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster\n", - " )\n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster,\n", - " lookup.match_probability desc\n", - " ) lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " group by\n", - " source,\n", - " source_id\n", - " ) lookup\n", - " right join df_ch ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join df_dh dh on\n", - " array_has(lookup.target_id, dh.unique_id)\n", - " and array_has(lookup.target, 'dit_data_hub__companies')\n", - " left join df_ew ew on\n", - " array_has(lookup.target_id, ew.unique_id)\n", - " and array_has(lookup.target, 'dit_export_wins__wins_dataset')\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "436df50d-0316-44e2-ac2a-287982514f82", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(100068, 4)" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "┌───────────┬───────────────────────────────────────────────┬─────────────────────────────────────┬─────────┐\n", - "│ unique_id │ ch_name │ dh_name │ ew_name │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├───────────┼───────────────────────────────────────────────┼─────────────────────────────────────┼─────────┤\n", - "│ 00591960 │ CALDER OILS LIMITED │ CALDER OILS LIMITED │ NULL │\n", - "│ 02926804 │ CHIEF PRODUCTIONS LIMITED │ CHIEF PRODUCTIONS LIMITED │ NULL │\n", - "│ 05325357 │ ONE STOP PROMOTIONS LIMITED │ ONE STOP PROMOTIONS LIMITED │ NULL │\n", - "│ 05537361 │ CONCRETE CANVAS LIMITED │ CONCRETE CANVAS LIMITED │ NULL │\n", - "│ 05576852 │ MINDRAY (UK) LIMITED │ MINDRAY (UK) LIMITED │ NULL │\n", - "│ 07073880 │ HEAR 4 U AND HEALTHSCREEN LIMITED │ HEAR 4 U AND HEALTHSCREEN LIMITED │ NULL │\n", - "│ 07735930 │ LAZARUS TRAINING LTD │ LAZARUS TRAINING LTD │ NULL │\n", - "│ 07928073 │ THRIVE THERAPEUTIC SOFTWARE LIMITED │ THRIVE THERAPEUTIC SOFTWARE LIMITED │ NULL │\n", - "│ 08155213 │ PIING LIMITED │ PIING GROUP LIMITED │ NULL │\n", - "│ 09182461 │ FREE RUNNING BUILDINGS LIMITED │ FREE RUNNING BUILDINGS LIMITED │ NULL │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ 08462178 │ SOLOMON KEY PUBLISHING LIMITED │ NULL │ NULL │\n", - "│ 05105346 │ CAPITAL PROPERTIES SOLUTIONS LIMITED │ NULL │ NULL │\n", - "│ 13638716 │ AKSH BUILDING SERVICES LTD │ NULL │ NULL │\n", - "│ 14707380 │ H & S MARKETING LIMITED │ NULL │ NULL │\n", - "│ 14132794 │ RESOURCE LABOUR SUPPLY LTD │ NULL │ NULL │\n", - "│ 07311410 │ PROMOSEO LTD │ NULL │ NULL │\n", - "│ 06445687 │ BUCKINGHAM PLUMBING LIMITED │ NULL │ NULL │\n", - "│ 11875845 │ T4C CONTRACTORS LTD │ NULL │ NULL │\n", - "│ 13325715 │ DERRY HILL MENSTON MANAGEMENT COMPANY LIMITED │ NULL │ NULL │\n", - "│ 02102349 │ ANCHOR DOOR SYSTEMS LIMITED │ NULL │ NULL │\n", - "├───────────┴───────────────────────────────────────────────┴─────────────────────────────────────┴─────────┤\n", - "│ ? rows (>9999 rows, 20 shown) 4 columns │\n", - "└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_no_dupes = duckdb.sql(\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from (\n", - " select distinct on (\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster\n", - " )\n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " 'dit_data_hub__companies',\n", - " 'dit_export_wins__wins_dataset'\n", - " )\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster,\n", - " lookup.match_probability desc\n", - " ) lookup\n", - " right outer join df_ch ch on\n", - " lookup.source_id = ch.unique_id \n", - " and lookup.source = 'companieshouse_companies'\n", - " left join df_dh dh on\n", - " lookup.target_id = dh.unique_id \n", - " and lookup.target = 'dit_data_hub__companies'\n", - " left join df_ew ew on\n", - " lookup.target_id = ew.unique_id\n", - " and lookup.target = 'dit_export_wins__wins_dataset'\n", - "\"\"\")\n", - "\n", - "join_no_dupes.df().shape\n", - "join_no_dupes" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "9164eb42-21b0-4b01-a86b-23dbe6c8c72a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "100000" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "dh_name 965\n", - "ew_name 110\n", - "dtype: int64" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "join_no_dupes.df()['unique_id'].nunique()\n", - "join_no_dupes.df()[['dh_name', 'ew_name']].notnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 129, - "id": "e36eb73a-2132-4efb-b192-6bfdf8de43f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────┬──────────────┐\n", - "│ unique_id │ count_star() │\n", - "│ varchar │ int64 │\n", - "├───────────┼──────────────┤\n", - "│ 03104628 │ 2 │\n", - "│ 05191341 │ 2 │\n", - "│ 03643009 │ 2 │\n", - "│ 05939666 │ 2 │\n", - "│ 04080825 │ 2 │\n", - "└───────────┴──────────────┘" - ] - }, - "execution_count": 129, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " unique_id,\n", - " count(*)\n", - " from\n", - " join_no_dupes\n", - " group by\n", - " unique_id\n", - " having\n", - " count(*) > 1\n", - " order by\n", - " count(*) desc\n", - " limit\n", - " 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "id": "bfb0e9d3-6d45-4e5c-aa99-be250e16d5f2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────┬───────────┬───────────────────────────────┬──────────────────────────────────────┐\n", - "│ source │ source_id │ target │ target_id │\n", - "│ varchar │ varchar │ varchar │ varchar │\n", - "├──────────────────────────┼───────────┼───────────────────────────────┼──────────────────────────────────────┤\n", - "│ companieshouse_companies │ 03104628 │ dit_export_wins__wins_dataset │ 729e4a59-ec8e-46b6-a9b4-f0854cd61cd2 │\n", - "│ companieshouse_companies │ 03104628 │ dit_data_hub__companies │ 2a64728a-1afa-4121-8a50-16a826c7a449 │\n", - "└──────────────────────────┴───────────┴───────────────────────────────┴──────────────────────────────────────┘" - ] - }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " source, source_id, target, target_id\n", - " from\n", - " lookup\n", - " where\n", - " source_id = '03104628'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "f9d8d12f-cbe4-4413-97f0-b7d9cea411a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────────────────────────┬──────────────────────────────────────┬──────────────────────────────────────┐\n", - "│ target │ target_id │ target_cluster │\n", - "│ varchar │ varchar │ varchar │\n", - "├───────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┤\n", - "│ dit_data_hub__companies │ 29a63f85-d175-e711-b809-e4115bead28a │ companieshouse_companies-__-05473995 │\n", - "│ dit_export_wins__wins_dataset │ 5540b265-bc14-42b8-a86f-c9e8fe8fac26 │ companieshouse_companies-__-05473995 │\n", - "└───────────────────────────────┴──────────────────────────────────────┴──────────────────────────────────────┘" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " target,\n", - " target_id,\n", - " target_cluster\n", - " from\n", - " lookup\n", - " where\n", - " source = 'companieshouse_companies'\n", - " and source_id = '05473995'\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 250, - "id": "c982ff7e-2d0d-4843-b1a7-80509e9c92c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────────────────────┬────────────────┬─────────────────┬─────────────────┬──────────┐\n", - "│ unique_id │ company_number │ company_name │ secondary_names │ postcode │\n", - "│ varchar │ varchar │ varchar │ varchar │ varchar │\n", - "├──────────────────────────────────────┼────────────────┼─────────────────┼─────────────────┼──────────┤\n", - "│ c563b6d0-c9d0-4807-abc2-50924e0fd187 │ 04934116 │ IGENNUS LIMITED │ │ │\n", - "│ 7225951f-a78e-45ea-9227-b19f8f547609 │ 04934116 │ IGENNUS LIMITED │ │ │\n", - "│ 5762ed98-c51e-4371-b80f-5133130ffdb2 │ 04934116 │ IGENNUS LIMITED │ │ │\n", - "│ ce6d9e46-643e-4679-bc93-c7ff12d1f822 │ 04934116 │ IGENNUS LIMITED │ │ │\n", - "└──────────────────────────────────────┴────────────────┴─────────────────┴─────────────────┴──────────┘" - ] - }, - "execution_count": 250, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " *\n", - " from\n", - " df_ew\n", - " where\n", - " unique_id in (\n", - " '5762ed98-c51e-4371-b80f-5133130ffdb2',\n", - " '7225951f-a78e-45ea-9227-b19f8f547609',\n", - " 'c563b6d0-c9d0-4807-abc2-50924e0fd187',\n", - " 'ce6d9e46-643e-4679-bc93-c7ff12d1f822'\n", - " )\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 236, - "id": "e5f42a92-08a9-4cea-bc3f-ef39ebe5f306", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
unique_idcompany_numbercompany_namesecondary_namescompany_statusaccount_categoryaddress_line_1address_line_2post_towncountycountrypostcodesic_code_1sic_code_2sic_code_3sic_code_4
999951321207913212079JCS TRANSPORT AUDITING SERVICES LTD[]ActiveMICRO ENTITYWESLEY HHOUSEBULL HILLLEATHERHEADENGLANDKT22 7AH70229 - Management consultancy activities othe...
9999609593695095936956 AND 6A QUADRANT ROAD MANAGEMENT COMPANY LTD.[]ActiveMICRO ENTITYFLAT 26A QUADRANT ROADTHORNTON HEATHENGLANDCR7 7DA98000 - Residents property management
999971109849211098492CJ WEBB PROPERTY LIMITED[]ActiveUNAUDITED ABRIDGED19-20 BOURNE COURTSOUTHEND ROADWOODFORD GREENESSEXUNITED KINGDOMIG8 8HD68209 - Other letting and operating of own or ...
999981385460413854604DB CAPTURES LIMITED[]Active - Proposal to Strike offNO ACCOUNTS FILED154 STERLING GARDENSLONDONENGLANDSE14 6DZ74202 - Other specialist photography
999990730456007304560YOGA & YOU LIMITED[]ActiveDORMANT43 VICTORIA ROADDARLINGTONCOUNTY DURHAMDL1 5SF99999 - Dormant Company
\n", - "
" - ], - "text/plain": [ - " unique_id company_number \\\n", - "99995 13212079 13212079 \n", - "99996 09593695 09593695 \n", - "99997 11098492 11098492 \n", - "99998 13854604 13854604 \n", - "99999 07304560 07304560 \n", - "\n", - " company_name secondary_names \\\n", - "99995 JCS TRANSPORT AUDITING SERVICES LTD [] \n", - "99996 6 AND 6A QUADRANT ROAD MANAGEMENT COMPANY LTD. [] \n", - "99997 CJ WEBB PROPERTY LIMITED [] \n", - "99998 DB CAPTURES LIMITED [] \n", - "99999 YOGA & YOU LIMITED [] \n", - "\n", - " company_status account_category \\\n", - "99995 Active MICRO ENTITY \n", - "99996 Active MICRO ENTITY \n", - "99997 Active UNAUDITED ABRIDGED \n", - "99998 Active - Proposal to Strike off NO ACCOUNTS FILED \n", - "99999 Active DORMANT \n", - "\n", - " address_line_1 address_line_2 post_town county \\\n", - "99995 WESLEY HHOUSE BULL HILL LEATHERHEAD \n", - "99996 FLAT 2 6A QUADRANT ROAD THORNTON HEATH \n", - "99997 19-20 BOURNE COURT SOUTHEND ROAD WOODFORD GREEN ESSEX \n", - "99998 154 STERLING GARDENS LONDON \n", - "99999 43 VICTORIA ROAD DARLINGTON COUNTY DURHAM \n", - "\n", - " country postcode \\\n", - "99995 ENGLAND KT22 7AH \n", - "99996 ENGLAND CR7 7DA \n", - "99997 UNITED KINGDOM IG8 8HD \n", - "99998 ENGLAND SE14 6DZ \n", - "99999 DL1 5SF \n", - "\n", - " sic_code_1 sic_code_2 \\\n", - "99995 70229 - Management consultancy activities othe... \n", - "99996 98000 - Residents property management \n", - "99997 68209 - Other letting and operating of own or ... \n", - "99998 74202 - Other specialist photography \n", - "99999 99999 - Dormant Company \n", - "\n", - " sic_code_3 sic_code_4 \n", - "99995 \n", - "99996 \n", - "99997 \n", - "99998 \n", - "99999 " - ] - }, - "execution_count": 236, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_ch.tail(5)" - ] - }, - { - "cell_type": "markdown", - "id": "c32230ca-e54c-428e-894c-4dd92d1cb690", - "metadata": {}, - "source": [ - "### Failed experiments in functionalisation" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "3022bf7f-f17e-4943-94b0-49e968767f18", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['table', 'table2']" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = [\"table alias\", \"table2 alias2\"]\n", - "y = {}\n", - "for i in x:\n", - " xi = i.split()\n", - " y[xi[0]] = xi[1]\n", - " \n", - "list(y.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "83bca5d4-2f25-4e42-b43e-944e6ffcc640", - "metadata": {}, - "outputs": [], - "source": [ - "def dw_join(from_table: str = None, left_join: list = None, dedupe: bool = False):\n", - " # Process source\n", - " source_clean = from_table.replace(\"\\\"\", \"\").replace(\".\", \"_\").split()\n", - " \n", - " # Process target(s)\n", - " targets_clean = [\n", - " table.replace(\"\\\"\", \"\").replace(\".\", \"_\") \n", - " for table \n", - " in left_join\n", - " ]\n", - " targets_dict = {} \n", - " for target in targets_clean:\n", - " target_and_alias = target.split()\n", - " targets_dict[target_and_alias[0]] = target_and_alias[1]\n", - " \n", - " # Some checks here\n", - " if dedupe:\n", - " dedupe_sql = f\"\"\"\n", - " {from_table}\n", - " (\n", - " select\n", - " source,\n", - " source_id,\n", - " array_agg(target) as target, \n", - " array_agg(target_id) as target_id\n", - " from (\n", - " select distinct on (\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster\n", - " )\n", - " *\n", - " from\n", - " lookup lookup\n", - " where\n", - " lookup.source = 'companieshouse_companies'\n", - " and lookup.target in (\n", - " {list(targets_dict.keys())}\n", - " )\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target,\n", - " lookup.target_cluster,\n", - " lookup.match_probability desc\n", - " ) lookup\n", - " where\n", - " lookup.source = {source_clean[0]}\n", - " and lookup.target in (\n", - " {list(targets_dict.keys())}\n", - " )\n", - " group by\n", - " source,\n", - " source_id\n", - " ) lookup\n", - " right join {source_clean[0]} {source_clean[1]} on\n", - " lookup.source_id = {source_clean[1]}.unique_id \n", - " and lookup.source = {source_clean[0]}\n", - " \"\"\"\n", - "\n", - " for target in targets_dict.keys():\n", - " dedupe_sql += f\"\"\"\n", - " left join {target} {targets_dict[target]} on\n", - " array_has(lookup.target_id, {targets_dict[target]}.unique_id)\n", - " and array_has(lookup.target, {target})\n", - " \"\"\"\n", - " \n", - " sql = dedupe_sql\n", - " \n", - " return sql" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "48adc4ee-18cb-43e0-8d2e-8d5d47d1c55f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\n \"companieshouse\".\"companies\" ch\\n (\\n select\\n source,\\n source_id,\\n array_agg(target) as target, \\n array_agg(target_id) as target_id\\n from (\\n select distinct on (\\n lookup.source_id, \\n lookup.target,\\n lookup.target_cluster\\n )\\n *\\n from\\n lookup lookup\\n where\\n lookup.source = \\'companieshouse_companies\\'\\n and lookup.target in (\\n [\\'dit_export_wins__wins_dataset\\', \\'dit_data_hub__companies\\']\\n )\\n order by\\n lookup.source_id, \\n lookup.target,\\n lookup.target_cluster,\\n lookup.match_probability desc\\n ) lookup\\n where\\n lookup.source = companieshouse_companies\\n and lookup.target in (\\n [\\'dit_export_wins__wins_dataset\\', \\'dit_data_hub__companies\\']\\n )\\n group by\\n source,\\n source_id\\n ) lookup\\n right join companieshouse_companies ch on\\n lookup.source_id = ch.unique_id \\n and lookup.source = companieshouse_companies\\n \\n left join dit_export_wins__wins_dataset ew on\\n array_has(lookup.target_id, ew.unique_id)\\n and array_has(lookup.target, dit_export_wins__wins_dataset)\\n \\n left join dit_data_hub__companies dh on\\n array_has(lookup.target_id, dh.unique_id)\\n and array_has(lookup.target, dit_data_hub__companies)\\n '" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dw_join(\n", - " from_table = '\"companieshouse\".\"companies\" ch',\n", - " left_join = [\n", - " '\"dit\".\"export_wins__wins_dataset\" ew',\n", - " '\"dit\".\"data_hub__companies\" dh',\t\n", - " ],\n", - " dedupe = True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "3f0a9c28-5ba4-4438-9768-bc2891fb6461", - "metadata": {}, - "outputs": [ - { - "ename": "ParserException", - "evalue": "Parser Error: syntax error at or near \"select\"\nLINE 10: select\n ^", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mParserException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[40], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;43m select\u001b[39;49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;43m ch.unique_id,\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43m ch.company_name as ch_name,\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m dh.company_name as dh_name,\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m ew.company_name as ew_name\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m from \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43mdw_join\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43mfrom_table\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompanieshouse\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompanies\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m ch\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43mleft_join\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;250;43m \u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexport_wins__wins_dataset\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m ew\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata_hub__companies\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m dh\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m\t\u001b[39;49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43mdedupe\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mParserException\u001b[0m: Parser Error: syntax error at or near \"select\"\nLINE 10: select\n ^" - ] - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - " select\n", - " ch.unique_id,\n", - " ch.company_name as ch_name,\n", - " dh.company_name as dh_name,\n", - " ew.company_name as ew_name\n", - " from {\n", - " dw_join(\n", - " from_table = '\"companieshouse\".\"companies\" ch',\n", - " left_join = [\n", - " '\"dit\".\"export_wins__wins_dataset\" ew',\n", - " '\"dit\".\"data_hub__companies\" dh',\t\n", - " ],\n", - " dedupe = True\n", - " )\n", - " }\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "cb2e4e31-cae0-4a54-800b-f9f100db3da2", - "metadata": {}, - "outputs": [], - "source": [ - "def dw_join(*args):\n", - " \n", - " lookup = {\n", - " 'df_ch': \"'companies_house'\",\n", - " 'df_dh': \"'datahub'\",\n", - " }\n", - " \n", - " sql = f\"\"\"\n", - " {args[0]}\n", - " left join test_lookup lookup on\n", - " lookup.source = {lookup[args[0].split()[0]]}\n", - " and lookup.target = {lookup[args[1].split()[0]]}\n", - " and lookup.source_id = {args[0].split()[1]}.unique_id\n", - " left join {args[1]} on\n", - " lookup.target_id = {args[1].split()[1]}.unique_id\n", - " where\n", - " lookup.source_cluster = lookup.target_cluster\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target_id,\n", - " lookup.match_probability desc\n", - " \"\"\"\n", - " \n", - " return sql" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "31ecaa57-6625-4357-8783-04449c8048ca", - "metadata": {}, - "outputs": [], - "source": [ - "def dw_join_subquery(*args):\n", - " \n", - " lookup = {\n", - " 'df_ch': \"'companies_house'\",\n", - " 'df_dh': \"'datahub'\",\n", - " }\n", - " \n", - " sql = f\"\"\"\n", - " (\n", - " select distinct on (lookup.source_id, lookup.target_id)\n", - " {args[0].split()[1]}.*,\n", - " {args[1].split()[1]}.*\n", - " from\n", - " {args[0]}\n", - " left join test_lookup lookup on\n", - " lookup.source = {lookup[args[0].split()[0]]}\n", - " and lookup.target = {lookup[args[1].split()[0]]}\n", - " and lookup.source_id = {args[0].split()[1]}.unique_id\n", - " left join {args[1]} on\n", - " lookup.target_id = {args[1].split()[1]}.unique_id\n", - " where\n", - " lookup.source_cluster = lookup.target_cluster\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target_id,\n", - " lookup.match_probability desc\n", - " )\n", - " \"\"\"\n", - " \n", - " return sql" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "f5d4f761-f693-40fd-a313-0eeba2f5729f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌───────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────┐\n", - "│ company_name │ secondary_names │\n", - "│ varchar │ varchar[] │\n", - "├───────────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────┤\n", - "│ NORFOLK CHAMBERS OF… │ [NORFOLK CHAMBER OF COMMERCE AND INDUSTRY] │\n", - "│ TATE & LYLE PUBLIC … │ [] │\n", - "│ COLEHERNE LIMITED │ [] │\n", - "│ J.T.DOVE,LIMITED │ [] │\n", - "│ SMITHS GROUP PLC │ [] │\n", - "│ SIMPSONS MALT LIMITED │ [] │\n", - "│ REFRESCO BEVERAGES … │ [REFRESCO GERBER UK LIMITED, GERBER JUICE COMPANY LIMITED, GERBER FOODS SOFT DRINKS LIMITED] │\n", - "│ JAMES CLARKE AND CO… │ [] │\n", - "│ NUERA PRODUCTS LIMI… │ [] │\n", - "│ THE GREETING CARD A… │ [] │\n", - "│ · │ · │\n", - "│ · │ · │\n", - "│ · │ · │\n", - "│ LIFE TRAINING SYSTE… │ [] │\n", - "│ OCEAN INSTALLER LIM… │ [HAVFRAM LIMITED, OCEAN INSTALLER LIMITED, PACIFIC SHELF 1687 LIMITED] │\n", - "│ AUTOMATION XL LIMITED │ [] │\n", - "│ BERINGAR LTD │ [] │\n", - "│ HARRIET B LTD │ [FENNEL MEDIA LIMITED] │\n", - "│ SPACE INTELLIGENCE … │ [] │\n", - "│ SLOW ADVENTURE LTD │ [] │\n", - "│ STORY LEARNING LIMI… │ [] │\n", - "│ K-VELL LTD │ [] │\n", - "│ BR CHAPEL LIMITED │ [] │\n", - "├───────────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────┤\n", - "│ 848 rows (20 shown) 2 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - " select \n", - " company_name,\n", - " secondary_names\n", - " from {dw_join_subquery('df_ch ch', 'df_dh dh')}\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "a70b48e9-3afb-46c2-846c-6630945681df", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────┬──────────────────────┬─────────────────────────────────────┬──────────────────────────────────┐\n", - "│ company_name │ company_name │ secondary_names │ secondary_names │\n", - "│ varchar │ varchar │ varchar[] │ varchar[] │\n", - "├──────────────────────┼──────────────────────┼─────────────────────────────────────┼──────────────────────────────────┤\n", - "│ PROSADDLES LIMITED │ PROSADDLES LIMITED │ [] │ [] │\n", - "│ MOCA FASHION LIMITED │ MOCA FASHION LIMITED │ [] │ [] │\n", - "│ OCEAN INSTALLER LI… │ OCEAN INSTALLER LI… │ [HAVFRAM LIMITED, OCEAN INSTALLER… │ ['OCEAN INSTALLER'] │\n", - "│ NEEDL ANALYTICS LI… │ NEEDL ANALYTICS LI… │ [NEEDLE ANALYTICS LIMITED, NEEDL … │ [] │\n", - "│ PCT LONDON LIMITED │ PCT LONDON LIMITED │ [OMNIO LONDON LIMITED] │ [] │\n", - "│ SPACE INTELLIGENCE… │ SPACE INTELLIGENCE… │ [] │ [] │\n", - "│ VISIONALITY MEDIA … │ VISIONALITY MEDIA … │ [] │ [] │\n", - "│ WELLS PLASTICS LIM… │ WELLS PLASTICS LIM… │ [] │ [] │\n", - "│ SNOOPBY UK LIMITED │ SNOOPBY UK LIMITED │ [] │ [] │\n", - "│ BOSTON PUTFORD OFF… │ BOSTON PUTFORD OFF… │ [] │ [] │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │\n", - "│ AB WORLD FOODS LIM… │ AB WORLD FOODS LIM… │ [PATAK'S FOODS LIMITED] │ ['AB WORLD FOODS'] │\n", - "│ BULLETPROOF DESIGN… │ BULLETPROOF DESIGN… │ [] │ ['SOUTHPAW', 'BULLETPROOF INC.'] │\n", - "│ STORY LEARNING LIM… │ STORY LEARNING LIM… │ [] │ [] │\n", - "│ BLACKTHORNE INTERN… │ BLACKTHORNE INTERN… │ [] │ [] │\n", - "│ SPEEDITEAR RULE PR… │ SPEEDITEAR RULE PR… │ [] │ [] │\n", - "│ BOXFAB LIMITED │ BOXFAB LIMITED │ [] │ [] │\n", - "│ KELKAY LIMITED │ KELKAY LIMITED │ [] │ ['Kelkay', 'AnchorFast'] │\n", - "│ SWALLOW PLACE ASSO… │ SWALLOW PLACE ASSO… │ [RBAKJA LLP, CAIRNEAGLE ASSOCIATE… │ [] │\n", - "│ BLUE BOX-LONDON LI… │ BLUE BOX-LONDON LI… │ [] │ [] │\n", - "│ DURESTA UPHOLSTERY… │ DURESTA UPHOLSTERY… │ [] │ ['Duresta'] │\n", - "├──────────────────────┴──────────────────────┴─────────────────────────────────────┴──────────────────────────────────┤\n", - "│ 848 rows (20 shown) 4 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(f\"\"\"\n", - " select distinct on (lookup.source_id, lookup.target_id)\n", - " ch.company_name,\n", - " dh.company_name,\n", - " ch.secondary_names,\n", - " dh.secondary_names\n", - " from {dw_join('df_dh dh', 'df_ch ch')}\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "id": "651edb39-ee8e-4613-bd84-d6270302ae28", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬──────────────────────┬──────────────────────┬─────────┬──────────────────────┬─────────────────────────────┐\n", - "│ source │ company_name │ secondary_names │ target │ company_name │ secondary_names │\n", - "│ varchar │ varchar │ varchar[] │ varchar │ varchar │ varchar[] │\n", - "├─────────┼──────────────────────┼──────────────────────┼─────────┼──────────────────────┼─────────────────────────────┤\n", - "│ ch │ RICHARD GRIFFIN (1… │ [] │ dh │ RICHARD GRIFFIN (1… │ ['Tarquin'] │\n", - "│ ch │ RIGHTON & BLACKBUR… │ [RIGHTON LIMITED] │ dh │ RIGHTON & BLACKBUR… │ ['RIGHTON BLACKBURNS', 'R… │\n", - "│ ch │ F.HINDS LIMITED │ [] │ dh │ F.HINDS LIMITED │ ['Chapelle'] │\n", - "│ ch │ H. CLARKSON & COMP… │ [] │ dh │ H. CLARKSON & COMP… │ [] │\n", - "│ ch │ JAMES LOCK AND CO.… │ [] │ dh │ JAMES LOCK AND CO.… │ [] │\n", - "│ ch │ JOHN HUNT (BOLTON)… │ [] │ dh │ JOHN HUNT (BOLTON)… │ [] │\n", - "│ ch │ SUCAFINA UK LTD │ [COMPLETE COFFEE L… │ dh │ SUCAFINA UK LTD │ ['CCL', 'Ridge & Breminer… │\n", - "│ ch │ PARALLOY LIMITED │ [] │ dh │ PARALLOY LIMITED │ ['PARALLOY'] │\n", - "│ ch │ S.BRANNAN & SONS,L… │ [] │ dh │ S.BRANNAN & SONS,L… │ ['BRANNAN'] │\n", - "│ ch │ D.A.SOLEY LIMITED │ [] │ dh │ D.A.SOLEY LIMITED │ [] │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ ch │ WILLO TECHNOLOGIES… │ [WEEVE TECHNOLOGIE… │ dh │ WILLO TECHNOLOGIES… │ [] │\n", - "│ ch │ EARTHWAVE LTD │ [] │ dh │ EARTHWAVE LTD │ [] │\n", - "│ ch │ N2 APPLIED LTD │ [] │ dh │ N2 APPLIED LTD │ [] │\n", - "│ ch │ KONGLOMERATE GAMES… │ [] │ dh │ KONGLOMERATE GAMES… │ [] │\n", - "│ ch │ TOLL HOUSE SPIRITS… │ [] │ dh │ TOLL HOUSE SPIRITS… │ [] │\n", - "│ ch │ AGILIS HEALTH LIMI… │ [] │ dh │ AGILIS HEALTH LIMI… │ [] │\n", - "│ ch │ CALEDONIA EDUCATIO… │ [] │ dh │ CALEDONIA EDUCATIO… │ [] │\n", - "│ ch │ APODIUM INTERNATIO… │ [] │ dh │ APODIUM INTERNATIO… │ [] │\n", - "│ ch │ SCOTTISH SPACE GRO… │ [] │ dh │ SCOTTISH SPACE GRO… │ [] │\n", - "│ ch │ EMPORIUM DIGITAL LTD │ [] │ dh │ EMPORIUM DIGITAL LTD │ [] │\n", - "├─────────┴──────────────────────┴──────────────────────┴─────────┴──────────────────────┴─────────────────────────────┤\n", - "│ 684 rows (20 shown) 6 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select distinct on (lookup.source_id, lookup.target_id)\n", - " 'ch' as source,\n", - " ch.company_name,\n", - " ch.secondary_names,\n", - " 'dh' as target,\n", - " dh.company_name,\n", - " dh.secondary_names\n", - " from\n", - " df_ch ch\n", - " left join test_lookup lookup on\n", - " lookup.source = 'companies_house'\n", - " and lookup.target = 'datahub'\n", - " and lookup.source_id = ch.unique_id\n", - " left join df_dh dh on\n", - " lookup.target_id = dh.unique_id\n", - " where\n", - " lookup.source_cluster = lookup.target_cluster\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target_id,\n", - " lookup.match_probability desc\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "398274ff-639c-4958-9668-c0daa908dc6c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬────────────────────────┬────────────────────┬─────────┬──────────────────────────────────┬─────────────────┐\n", - "│ source │ company_name │ secondary_names │ target │ company_name │ secondary_names │\n", - "│ varchar │ varchar │ varchar[] │ varchar │ varchar │ varchar │\n", - "├─────────┼────────────────────────┼────────────────────┼─────────┼──────────────────────────────────┼─────────────────┤\n", - "│ dh │ THE BRIARS GROUP LIM… │ [] │ ew │ The Briars Group Ltd │ │\n", - "│ dh │ AVEVA GROUP LIMITED │ ['AVEVA'] │ ew │ AVEVA │ │\n", - "│ dh │ CLUCAS METHOD OF ENT… │ ['Clucas M O E'] │ ew │ Clucas Method Of Entry Limited │ │\n", - "│ dh │ MENOPOISED LTD │ [] │ ew │ Menopoised Ltd │ │\n", - "│ dh │ LOVESEITAN LTD │ [] │ ew │ Loveseitan Ltd │ │\n", - "│ dh │ CHERIDA LIMITED │ [] │ ew │ CHERIDA LIMITED │ │\n", - "│ dh │ INTASITE LTD │ [] │ ew │ Intasite Ltd │ │\n", - "│ dh │ UKDE LIMITED │ [] │ ew │ UKDE Limited │ │\n", - "│ dh │ TODDLE BORN WILD LIM… │ [] │ ew │ TODDLE BORN WILD LIMITED │ │\n", - "│ dh │ EXSEL DESIGN AND INT… │ [] │ ew │ Exsel Design and Integration Ltd │ │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ · │ · │ · │ · │ · │ · │\n", - "│ dh │ ASCENDAL GROUP LIMITED │ ['ASCENDAL GROUP'] │ ew │ ASCENDAL GROUP │ │\n", - "│ dh │ GFM FILMS LLP │ [] │ ew │ GFM Films │ │\n", - "│ dh │ X-RAY MINERAL SERVIC… │ [] │ ew │ X-ray Mineral Services Ltd │ │\n", - "│ dh │ ALLIOT TECHNOLOGIES … │ [] │ ew │ ALLIOT TECHNOLOGIES LIMITED │ │\n", - "│ dh │ SALOTO LTD. │ [] │ ew │ Saloto Ltd │ │\n", - "│ dh │ SCITEK CONSULTANTS L… │ [] │ ew │ SCITEK CONSULTANTS LIMITED │ │\n", - "│ dh │ CUSTOM VET PRODUCTS … │ [] │ ew │ CUSTOM VET PRODUCTS LIMITED │ │\n", - "│ dh │ TROY ASSET MANAGEMEN… │ [] │ ew │ Troy Asset Management │ │\n", - "│ dh │ SIMPLY DOUGHNUTS LTD │ [] │ ew │ SIMPLY DOUGHNUTS LTD │ │\n", - "│ dh │ INSIGHTFUL BRANDS LI… │ ['HIP POP'] │ ew │ Insightful Brands Limited │ │\n", - "├─────────┴────────────────────────┴────────────────────┴─────────┴──────────────────────────────────┴─────────────────┤\n", - "│ 1206 rows (20 shown) 6 columns │\n", - "└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select distinct on (lookup.source_id, lookup.target_id)\n", - " 'dh' as source,\n", - " dh.company_name,\n", - " dh.secondary_names,\n", - " 'ew' as target,\n", - " ew.company_name,\n", - " ew.secondary_names\n", - " from\n", - " df_dh dh\n", - " left join test_lookup lookup on\n", - " lookup.source = 'datahub'\n", - " and lookup.target = 'export_wins'\n", - " and lookup.source_id = dh.unique_id\n", - " left join df_ew ew on\n", - " lookup.target_id = ew.unique_id\n", - " where\n", - " lookup.source_cluster = lookup.target_cluster\n", - " order by\n", - " lookup.source_id, \n", - " lookup.target_id,\n", - " lookup.match_probability desc\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "d496a5fd-d70b-4c34-946b-9cfc60060c6a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (4, 14)
cluster_idsource_datasetunique_idcomp_num_cleanname_unusual_tokenssecondary_name_unusual_tokensnames_tokens_stopwordspostcodepostcode_altname_unusual_tokens_first5name_unusual_tokens_last5postcode_areatf_comp_num_cleantf_name_unusual_tokens
strstrstrstrstrstrstrstrf64strstrstrf64f64
"companies_hous…"datahub""6e56cb7c-d286-…"1846493""aecom""aecom arabia""limited"""null"aecom""aecom"null0.0000260.000055
"companies_hous…"companies_hous…"01846493""1846493""aecom""faber fabermau…"limited""E1 8FA"null"aecom""aecom""E"0.0000260.000055
"companies_hous…"datahub""e3b2f38a-cb5c-…"1846493""aecom""""limited"""null"aecom""aecom"null0.0000260.000055
"companies_hous…"datahub""ae5b6e81-0d17-…"1846493""aecom""""limited"""null"aecom""aecom"null0.0000260.000055
" - ], - "text/plain": [ - "shape: (4, 14)\n", - "┌──────────┬────────────┬─────────┬────────────┬───┬────────────┬────────────┬────────────┬────────────┐\n", - "│ cluster_ ┆ source_dat ┆ unique_ ┆ comp_num_c ┆ … ┆ name_unusu ┆ postcode_a ┆ tf_comp_nu ┆ tf_name_un │\n", - "│ id ┆ aset ┆ id ┆ lean ┆ ┆ al_tokens_ ┆ rea ┆ m_clean ┆ usual_toke │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ last5 ┆ --- ┆ --- ┆ ns │\n", - "│ str ┆ str ┆ str ┆ str ┆ ┆ --- ┆ str ┆ f64 ┆ --- │\n", - "│ ┆ ┆ ┆ ┆ ┆ str ┆ ┆ ┆ f64 │\n", - "╞══════════╪════════════╪═════════╪════════════╪═══╪════════════╪════════════╪════════════╪════════════╡\n", - "│ companie ┆ datahub ┆ 6e56cb7 ┆ 1846493 ┆ … ┆ aecom ┆ null ┆ 0.000026 ┆ 0.000055 │\n", - "│ s_house- ┆ ┆ c-d286- ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ __-01846 ┆ ┆ 403b-9a ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ 493 ┆ ┆ 5d-b338 ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ 20e1… ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ companie ┆ companies_ ┆ 0184649 ┆ 1846493 ┆ … ┆ aecom ┆ E ┆ 0.000026 ┆ 0.000055 │\n", - "│ s_house- ┆ house ┆ 3 ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ __-01846 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ 493 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ companie ┆ datahub ┆ e3b2f38 ┆ 1846493 ┆ … ┆ aecom ┆ null ┆ 0.000026 ┆ 0.000055 │\n", - "│ s_house- ┆ ┆ a-cb5c- ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ __-01846 ┆ ┆ 477b-8d ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ 493 ┆ ┆ 4e-d1ba ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ 9d2f… ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ companie ┆ datahub ┆ ae5b6e8 ┆ 1846493 ┆ … ┆ aecom ┆ null ┆ 0.000026 ┆ 0.000055 │\n", - "│ s_house- ┆ ┆ 1-0d17- ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ __-01846 ┆ ┆ 4211-9a ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ 493 ┆ ┆ 2e-c556 ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ 6451… ┆ ┆ ┆ ┆ ┆ ┆ │\n", - "└──────────┴────────────┴─────────┴────────────┴───┴────────────┴────────────┴────────────┴────────────┘" - ] - }, - "execution_count": 164, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " pl.from_pandas(df_clusters)\n", - " .filter(pl.col('cluster_id') == 'companies_house-__-01846493')\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "id": "cfd71714-2edc-445d-bdc9-32e52c5a7e69", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────────────┬──────────────────────┬──────────────────────────────────────────────────────┬─────────────────┐\n", - "│ company_name │ company_name │ secondary_names │ secondary_names │\n", - "│ varchar │ varchar │ varchar[] │ varchar[] │\n", - "├──────────────────────┼──────────────────────┼──────────────────────────────────────────────────────┼─────────────────┤\n", - "│ DEPT DESIGN & TECH… │ DEPT DESIGN & TECH… │ [DEBT DESIGN & TECHNOLOGY LTD, BUILDING BLOCKS (UK… │ [] │\n", - "│ GREEN PIONEER LIMI… │ GREEN PIONEER LIMI… │ [] │ [] │\n", - "│ OPAL PARTNERS LIMI… │ OPAL PARTNERS LIMI… │ [] │ [] │\n", - "│ HARRY HALL INTERNA… │ HARRY HALL INTERNA… │ [MATCHMAKERS INTERNATIONAL LIMITED] │ ['Harry Hall'] │\n", - "│ DH SALES LIMITED │ DH SALES LIMITED │ [] │ [] │\n", - "└──────────────────────┴──────────────────────┴──────────────────────────────────────────────────────┴─────────────────┘" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " ch.company_name,\n", - " dh.company_name,\n", - " ch.secondary_names,\n", - " dh.secondary_names\n", - " from\n", - " df_ch ch\n", - " left join df_dh dh on\n", - " ch.company_number = dh.company_number\n", - " limit 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c26d3330-2a9c-4652-92f5-e97d3eb8ca1e", - "metadata": {}, - "outputs": [], - "source": [ - "duckdb.sql(\"\"\"\n", - " select\n", - " ch.company_name,\n", - " dh.company_name,\n", - " ch.secondary_names,\n", - " dh.secondary_names\n", - " from\n", - " df_ch ch\n", - " left join (\n", - " select \n", - " cluster_id,\n", - " unique_id\n", - " from\n", - " df_clusters\n", - " where\n", - " source_dataset in ['datahub', 'companies_house']\n", - " ) clu on\n", - " left join df_dh dh on\n", - " ch.company_number = dh.company_number\n", - " limit 5\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "ca5b62ee-36b0-4f38-9bdc-427127de42dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_numbercompany_namesecondary_namescompany_statusaccount_categoryaddress_line_1address_line_2post_towncountycountrypostcodesic_code_1sic_code_2sic_code_3sic_code_4
005750887YEM PROPERTY INVESTMENTS LIMITED[]ActiveTOTAL EXEMPTION FULLASM HOUSE103A KEYMER ROADHASSOCKSWEST SUSSEXUNITED KINGDOMBN6 8QL68209 - Other letting and operating of own or ...
109123245FAIRMONT PROPERTY SERVICES LIMITED[]ActiveMICRO ENTITYQUEENS COURT9-17 EASTERN ROADROMFORDESSEXENGLANDRM1 3NH43341 - Painting43390 - Other building completion and finishing
211383369CHRIST KINGDOM WELFARE MINISTRIES LTD[]ActiveMICRO ENTITY29A LONDON ROADBARKINGUNITED KINGDOMIG11 8AF94910 - Activities of religious organizations
309939647DURONIC FOUNDATION LIMITED[]ActiveMICRO ENTITY1A SPILSBY ROADROMFORDESSEXUNITED KINGDOMRM8 8SB99000 - Activities of extraterritorial organiz...
407441481DENTAL APPLIANCE MANUFACTURING LTD[]ActiveDORMANT300 ST. MARYS ROADGARSTONLIVERPOOLL19 0NQ32500 - Manufacture of medical and dental inst...
................................................
9999514710751AUTOGRAPH AGENCY LTD[]ActiveNO ACCOUNTS FILED71-75 SHELTON STREETCOVENT GARDENLONDONUNITED KINGDOMWC2H 9JQ73110 - Advertising agencies
9999610965149STEFAN HORNIG LIMITED[]ActiveMICRO ENTITY29 MORRISON AVENUELONDONENGLANDN17 6TU59112 - Video production activities
9999714043983LAFAMILLIA LIMITED[]ActiveNO ACCOUNTS FILED39 ENDERS COURTMEDBOURNEMILTON KEYNESENGLANDMK5 6GD56103 - Take-away food shops and mobile food s...78200 - Temporary employment agency activities
9999808975663PB COMMUNICATIONS CONSULTANTS LTD[]ActiveTOTAL EXEMPTION FULL30/32 GILDREDGE ROADEASTBOURNEEAST SUSSEXBN21 4SH62020 - Information technology consultancy act...
9999909448448J&M CIVILS LIMITED[]ActiveTOTAL EXEMPTION FULL1 HARDY CLOSE, NELSON COURT BUSINESS CENTREASHTON-ON-RIBBLEPRESTONENGLANDPR2 2XP42210 - Construction of utility projects for f...
\n", - "

100000 rows × 15 columns

\n", - "
" - ], - "text/plain": [ - " company_number company_name secondary_names \\\n", - "0 05750887 YEM PROPERTY INVESTMENTS LIMITED [] \n", - "1 09123245 FAIRMONT PROPERTY SERVICES LIMITED [] \n", - "2 11383369 CHRIST KINGDOM WELFARE MINISTRIES LTD [] \n", - "3 09939647 DURONIC FOUNDATION LIMITED [] \n", - "4 07441481 DENTAL APPLIANCE MANUFACTURING LTD [] \n", - "... ... ... ... \n", - "99995 14710751 AUTOGRAPH AGENCY LTD [] \n", - "99996 10965149 STEFAN HORNIG LIMITED [] \n", - "99997 14043983 LAFAMILLIA LIMITED [] \n", - "99998 08975663 PB COMMUNICATIONS CONSULTANTS LTD [] \n", - "99999 09448448 J&M CIVILS LIMITED [] \n", - "\n", - " company_status account_category \\\n", - "0 Active TOTAL EXEMPTION FULL \n", - "1 Active MICRO ENTITY \n", - "2 Active MICRO ENTITY \n", - "3 Active MICRO ENTITY \n", - "4 Active DORMANT \n", - "... ... ... \n", - "99995 Active NO ACCOUNTS FILED \n", - "99996 Active MICRO ENTITY \n", - "99997 Active NO ACCOUNTS FILED \n", - "99998 Active TOTAL EXEMPTION FULL \n", - "99999 Active TOTAL EXEMPTION FULL \n", - "\n", - " address_line_1 address_line_2 \\\n", - "0 ASM HOUSE 103A KEYMER ROAD \n", - "1 QUEENS COURT 9-17 EASTERN ROAD \n", - "2 29A LONDON ROAD \n", - "3 1A SPILSBY ROAD \n", - "4 300 ST. MARYS ROAD GARSTON \n", - "... ... ... \n", - "99995 71-75 SHELTON STREET COVENT GARDEN \n", - "99996 29 MORRISON AVENUE \n", - "99997 39 ENDERS COURT MEDBOURNE \n", - "99998 30/32 GILDREDGE ROAD \n", - "99999 1 HARDY CLOSE, NELSON COURT BUSINESS CENTRE ASHTON-ON-RIBBLE \n", - "\n", - " post_town county country postcode \\\n", - "0 HASSOCKS WEST SUSSEX UNITED KINGDOM BN6 8QL \n", - "1 ROMFORD ESSEX ENGLAND RM1 3NH \n", - "2 BARKING UNITED KINGDOM IG11 8AF \n", - "3 ROMFORD ESSEX UNITED KINGDOM RM8 8SB \n", - "4 LIVERPOOL L19 0NQ \n", - "... ... ... ... ... \n", - "99995 LONDON UNITED KINGDOM WC2H 9JQ \n", - "99996 LONDON ENGLAND N17 6TU \n", - "99997 MILTON KEYNES ENGLAND MK5 6GD \n", - "99998 EASTBOURNE EAST SUSSEX BN21 4SH \n", - "99999 PRESTON ENGLAND PR2 2XP \n", - "\n", - " sic_code_1 \\\n", - "0 68209 - Other letting and operating of own or ... \n", - "1 43341 - Painting \n", - "2 94910 - Activities of religious organizations \n", - "3 99000 - Activities of extraterritorial organiz... \n", - "4 32500 - Manufacture of medical and dental inst... \n", - "... ... \n", - "99995 73110 - Advertising agencies \n", - "99996 59112 - Video production activities \n", - "99997 56103 - Take-away food shops and mobile food s... \n", - "99998 62020 - Information technology consultancy act... \n", - "99999 42210 - Construction of utility projects for f... \n", - "\n", - " sic_code_2 sic_code_3 sic_code_4 \n", - "0 \n", - "1 43390 - Other building completion and finishing \n", - "2 \n", - "3 \n", - "4 \n", - "... ... ... ... \n", - "99995 \n", - "99996 \n", - "99997 78200 - Temporary employment agency activities \n", - "99998 \n", - "99999 \n", - "\n", - "[100000 rows x 15 columns]" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_ch" - ] - }, - { - "cell_type": "markdown", - "id": "2915287f-60ed-4a9e-99ea-a0a9432bbfd4", - "metadata": {}, - "source": [ - "## Refining blocking rules" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "67ceb58c-20ed-415b-a598-c439de8bec6c", - "metadata": {}, - "outputs": [], - "source": [ - "blocking_rules = {\n", - " 'blocking_rule_1': \"\"\"\n", - " ((l.comp_num_clean = r.comp_num_clean)) \n", - " and (\n", - " l.comp_num_clean <> '' \n", - " and r.comp_num_clean <> ''\n", - " )\n", - " \"\"\",\n", - " 'blocking_rule_2': \"\"\"\n", - " (l.name_unusual_tokens = r.name_unusual_tokens) \n", - " and (\n", - " l.name_unusual_tokens <> '' \n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " # 'blocking_rule_3': \"\"\"\n", - " # (l.name_unusual_tokens_first5 = r.name_unusual_tokens_first5) \n", - " # and (\n", - " # length(l.name_unusual_tokens_first5) = 5 \n", - " # and length(r.name_unusual_tokens_first5) = 5\n", - " # )\n", - " # \"\"\",\n", - " # 'blocking_rule_4': \"\"\"\n", - " # (l.name_unusual_tokens_last5 = r.name_unusual_tokens_last5) \n", - " # and (\n", - " # length(l.name_unusual_tokens_last5) = 5 \n", - " # and length(r.name_unusual_tokens_last5) = 5\n", - " # )\n", - " # \"\"\",\n", - " 'blocking_rule_5': \"\"\"\n", - " (l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens) \n", - " and (\n", - " l.secondary_name_unusual_tokens <> '' \n", - " and r.secondary_name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " 'blocking_rule_6': \"\"\"\n", - " (l.secondary_name_unusual_tokens = r.name_unusual_tokens) \n", - " and (\n", - " l.secondary_name_unusual_tokens <> '' \n", - " and r.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " 'blocking_rule_7': \"\"\"\n", - " (r.secondary_name_unusual_tokens = l.name_unusual_tokens) \n", - " and (\n", - " r.secondary_name_unusual_tokens <> '' \n", - " and l.name_unusual_tokens <> ''\n", - " )\n", - " \"\"\",\n", - " # 'blocking_rule_8': \"\"\"\n", - " # (l.name_sig_first5 = r.name_sig_first5) \n", - " # and (\n", - " # length(l.name_sig_first5) = 5 \n", - " # and length(r.name_sig_first5) = 5\n", - " # )\n", - " # \"\"\",\n", - " # 'blocking_rule_9': \"\"\"\n", - " # (l.name_sig_last5 = r.name_sig_last5) \n", - " # and (\n", - " # length(l.name_sig_last5) = 5 \n", - " # and length(r.name_sig_last5) = 5\n", - " # )\n", - " # \"\"\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "4fe28341-7e2c-451e-84e1-33e03fa70e36", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-06-21 11:13:11.551834: Evaluating blocking_rule_1\n", - "2023-06-21 11:13:11.775857: Finished evaluating blocking_rule_1\n", - "2023-06-21 11:13:11.775945: Evaluating blocking_rule_2\n", - "2023-06-21 11:13:12.080950: Finished evaluating blocking_rule_2\n", - "2023-06-21 11:13:12.081043: Evaluating blocking_rule_3\n", - "2023-06-21 11:13:15.578577: Finished evaluating blocking_rule_3\n", - "2023-06-21 11:13:15.578697: Evaluating blocking_rule_4\n", - "2023-06-21 11:13:22.542116: Finished evaluating blocking_rule_4\n", - "2023-06-21 11:13:22.542208: Evaluating blocking_rule_5\n", - "2023-06-21 11:13:22.675390: Finished evaluating blocking_rule_5\n", - "2023-06-21 11:13:22.675431: Evaluating blocking_rule_6\n", - "2023-06-21 11:13:22.852258: Finished evaluating blocking_rule_6\n", - "2023-06-21 11:13:22.852348: Evaluating blocking_rule_7\n", - "2023-06-21 11:13:23.007059: Finished evaluating blocking_rule_7\n", - "2023-06-21 11:13:23.007099: Evaluating blocking_rule_8\n", - "2023-06-21 11:13:23.517889: Finished evaluating blocking_rule_8\n", - "2023-06-21 11:13:23.517984: Evaluating blocking_rule_9\n", - "2023-06-21 11:13:23.975362: Finished evaluating blocking_rule_9\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rulecount
0blocking_rule_111539
1blocking_rule_2105347
2blocking_rule_331721861
3blocking_rule_462421318
4blocking_rule_535
5blocking_rule_612530
6blocking_rule_7277
7blocking_rule_82124023
8blocking_rule_91529366
\n", - "
" - ], - "text/plain": [ - " rule count\n", - "0 blocking_rule_1 11539\n", - "1 blocking_rule_2 105347\n", - "2 blocking_rule_3 31721861\n", - "3 blocking_rule_4 62421318\n", - "4 blocking_rule_5 35\n", - "5 blocking_rule_6 12530\n", - "6 blocking_rule_7 277\n", - "7 blocking_rule_8 2124023\n", - "8 blocking_rule_9 1529366" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rule_counts = {\n", - " 'rule': [],\n", - " 'count': []\n", - "}\n", - "\n", - "for rule in blocking_rules.keys():\n", - " print(f'{datetime.datetime.now()}: Evaluating {rule}')\n", - " \n", - " count = linker.count_num_comparisons_from_blocking_rule(blocking_rules[rule])\n", - " \n", - " print(f'{datetime.datetime.now()}: Finished evaluating {rule}')\n", - " \n", - " rule_counts['rule'].append(rule)\n", - " rule_counts['count'].append(count)\n", - "\n", - "pd.DataFrame.from_dict(rule_counts)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "id": "ac92e9b8-e8a6-4be8-a533-353a06f8ae37", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v4+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "data": { - "values": [ - { - "cartesian": 46148700000, - "cumulative_rows": 11539, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 1.0. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 11539, - "rule": "\n ((l.comp_num_clean = r.comp_num_clean)) \n and (\n l.comp_num_clean <> '' \n and r.comp_num_clean <> ''\n )\n ", - "start": 0 - }, - { - "cartesian": 46148700000, - "cumulative_rows": 107204, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 0.999998. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 95665, - "rule": "\n (l.name_unusual_tokens = r.name_unusual_tokens) \n and (\n l.name_unusual_tokens <> '' \n and r.name_unusual_tokens <> ''\n )\n ", - "start": 11539 - }, - { - "cartesian": 46148700000, - "cumulative_rows": 107238, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 0.999998. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 34, - "rule": "\n (l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens) \n and (\n l.secondary_name_unusual_tokens <> '' \n and r.secondary_name_unusual_tokens <> ''\n )\n ", - "start": 107204 - }, - { - "cartesian": 46148700000, - "cumulative_rows": 113882, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 0.999998. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 6644, - "rule": "\n (l.secondary_name_unusual_tokens = r.name_unusual_tokens) \n and (\n l.secondary_name_unusual_tokens <> '' \n and r.name_unusual_tokens <> ''\n )\n ", - "start": 107238 - }, - { - "cartesian": 46148700000, - "cumulative_rows": 114078, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 0.999998. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 196, - "rule": "\n (r.secondary_name_unusual_tokens = l.name_unusual_tokens) \n and (\n r.secondary_name_unusual_tokens <> '' \n and l.name_unusual_tokens <> ''\n )\n ", - "start": 113882 - } - ] - }, - "encoding": { - "color": { - "field": "rule", - "legend": null, - "scale": { - "scheme": "category20c" - } - }, - "order": { - "field": "cumulative_rows" - }, - "tooltip": [ - { - "field": "rule", - "title": "SQL Condition", - "type": "nominal" - }, - { - "field": "row_count", - "format": ",", - "title": "Comparisons Generated", - "type": "quantitative" - }, - { - "field": "cumulative_rows", - "format": ",", - "title": "Cumulative Comparisons", - "type": "quantitative" - }, - { - "field": "cartesian", - "format": ",", - "title": "Cartesian Product of Input Data", - "type": "quantitative" - }, - { - "field": "reduction_ratio", - "title": "Reduction Ratio (cumulative rows/cartesian product)", - "type": "nominal" - } - ], - "x": { - "field": "start", - "title": "Comparisons Generated by Rule(s)", - "type": "quantitative" - }, - "x2": { - "field": "cumulative_rows" - }, - "y": { - "field": "rule", - "sort": [ - "-x2" - ], - "title": "SQL Blocking Rule" - } - }, - "height": { - "step": 20 - }, - "mark": "bar", - "title": { - "subtitle": "(Counts exclude comparisons already generated by previous rules)", - "text": "Count of Additional Comparisons Generated by Each Blocking Rule" - }, - "width": 450 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApcAAACtCAYAAADlElRzAAAAAXNSR0IArs4c6QAAIABJREFUeF7s3QWUJcXB/v8ihATXEDy4BQ/ubsHlxV2COwGCu7u7uxNCcHf4AcHdQ4DgEpyX//lU/rVvc7fvnbqzu7N3ZqrO2bO7M9XdVU9VV337KRtm1lln/SmUUBQoChQFigJFgaJAUaAoUBQYDAoMU+ByMKhYblEUKAoUBYoCRYGiQFGgKBAVKHBZKkJRoChQFCgKFAWKAkWBosBgU6DA5WCTstyoKFAUKAoUBYoCRYGiQFGgwGWpA0WBokBRoChQFCgKFAWKAoNNgQKXg03KcqOiQPcV+O1vfxt+97vfhaeeeip899133b/RULpy5JFHDmOMMUZ4++23m6bgF7/4Rfzd//7v/w7WVI444ojB8//zn//EP42hq98PzsTk5nGkkUYK0003XXjxxRfDZ599NjiTMETv1ZNadjcj448/fvjDH/4Q/vnPf4Znnnkm/PDDD929VbkuhJDqdJ0Yg/td7krwxrTUPT/3HezqWV39Xpst/Pvf/+4qatu/r9O8Xa17SodmmStw2XaxlwuKAoNPgbnmmiscdthhAWyk8P7774dNN900/Otf/xp8D2pyp0kmmSRssMEG4fXXXw/nnXdet553zTXXhIkmmihC4xxzzFF7j8knnzxcdtll8XdvvfVWWHnllVs+64477gijjjpq2H333cMtt9wyUNzq7xdeeOGw+OKLh//3//5f2HzzzcMiiywSFlhggXDXXXfFP4cccsjPft+tTGZedMMNN4RxxhknHHrooeHKK68c6Kpxxx03nHzyyfFDIoWvv/467LHHHuGee+7JfMrQi9aTWrabyxlnnDEce+yxsd6kACxPP/30cPbZZ7d7uyEef6ONNor14Kyzzmr5UVaXkF/96lfhgQceiL+abbbZfhZF/r0frd7H3MwNP/zw4b777msavfHZufdN8R599NEwzDDDDJSHuvusv/76YZttthnoV1999VU48cQTw+WXXx5/l+45zzzzDNKH+nLLLRf22WefWDYrrbTSQM/V3gjaGmkYnEFb4EOuGpTne++9F7baaqus+tJVWzQ401t3rwKXQ1rhcv+iQBMFANell14aG9effvopwuR4440XnYLvv/8+LLjggoPUOOYI/8c//jHsv//+4cMPPwxLLbVUziU/izPZZJPFRl36b7zxxrD33nvX3uPII48MCy200IDfLbHEEuHjjz9u+rx24PI3v/lNWGWVVcKtt94aTj311HDUUUdF7W6//faw6667hrXWWutnv287k21ckBr0ww8/fEBnly7/5S9/GdM4yiijxB+9++67YayxxgpAQQDGqcNq45E9GrUntWwnY9V36ccffwxPP/10GHvsscMEE0wQb6NOXHLJJe3ccojH9dE05phjNv2AapWAVnA52mijxbo/OOAS4KSPHmBTdc+88yussMIg6ZTqew6kJriUBq60dpN+6cN8scUWC59++ulgg0tA6aPPewo0G4P6pBzWXHPNwd5OJ7g0quGPtsOHqb7B/xdddNEudW/VFnV58WCIUOByMIhYblEU6I4CV111VZh44onD559/HpZffvnw5Zdfxsbq7rvvDsMNN1w47bTTwhlnnBF8ge+7775x2FmDDgR32WWXOOQH5jQ01113XTj66KOjE3LBBRfEe2oQt9tuu7DiiiuGJ598Mkw11VSxMX7nnXfCAQccEDsKX/w6EPd9+eWXI4g1hj/96U9hnXXWCSOMMELQcYvHQfCsE044ITburjc8tMwyy9RKwWWRN0P+/ubqcfdSAIdbb711TIv0GXLimiTnstXvp5566rDqqquG2267LQL6JptsEvUD6DfddFOE2PR7+fZ88Mdl9e9vvvkmOj00pon7yOezzz4bOMvcr/R76QXhO++8c3TIvv322+jE7rXXXuG1114LrRp0oPs///M/USt/v/HGGzH7tOAgK08ucqvy5sp6lms9Hzx98skn4cADDwx/+ctfIlDpYJXPSy+9FD9efLBIP7inqedsv/32cQoBKAP+4kgXgFDv6Ca9Pj7UR52/OnX//ff/TEvp3m233QLXUGdP/4MOOig8/vjjMW/N6o40tqqb//jHP6Imre5drWhXX311rI+er74nCOJa/v73v4/TDzbeeOPgQ4S7OcUUU8TLjRLQShl2lR4dO505VeoNPY444ohw8803x48ZH2nqjPd0wgknDPPPP398T5dddtmou/fb73faaaeo+dxzzx0143q5z4MPPtg0bdLq/sp/2GGHDY899lism0IjmFXhUjnMNNNMsQ77+Dv44IPjSIlrH3744Zg+4a9//WtwnfdNGadQhUt5rJt24v3X9swwwwwxbd43H1F+JrgOpI0++uhRA1N/9txzz/jvBJf33ntv1EMd5JT++c9/HqgdSXBZ/RAGXXSjozrt2kbnslUd9JAtt9wyrLHGGrGMPvjgg1jftQ+NcMnFlBfpBpQ083yjJltssUXTdlZdFtQd5aceeb+SebDkkksOlNcEl9WPVG2DNlLQdtGwVdvf2BaZKiIN3gH1QV3kgg6paVgFLrtDBeWaosBgUCABFwdDo55Cda6MDhOEajw1An6nQdN5cv8Ams7lzjvvjA0y0LrooosiWGmsNU4aNEGDovF3L4CoszruuOMicLqfRlBDXA1gc8cdd4w/0ply3VwPJHfYYYfYGQIa17/66qux0W0MyR2VfmnbcMMN471Suqaffvpw7rnnxstAnTx6hkAXwNDq99VhcY2yzkJHYbgZdHMHq8Pm4GnWWWeN9+cC6FSFVA5V9xB4updACwCp3KRRJ6djBd1p6KwVXMq78qEdTaohlTkoaVXe3BnAlbRSnikog3SfNPVAR5mcnQT24uvgDcvqIM1RBDjymuqCzuv444+PoJuCe4K0qpZca+61MvJs7spHH30UdJit6o78t6qb6lGrezfWsYceeii+F4bt6dcs/P3vf48fLt4Pf8CTv4Eg8Gr2rkiP33vnAJB6A5YEjrNpIQAqBfUY7Pj4Ep/7lZwnIGyOsLKkGRgDm+I3SxsIWHfddePtpdfHUwrN4NLvPdsHUKrDYNIzfBxpD0BmajPUH+VdnaNahUvlX3UugZz3H3T5kHWduq0+CdoSebviiivi+wxM3c+/04dU9V1TB9NQcF05Jrik7SuvvBKfQVPvr7o733zzxZ9V4dJHZbP2Sx300Ux34YsvvhgwqqCN8H4n55ITLC49xQfg1WFxH6at6g54pblQbVPcb/bZZx+ouia49OHp2XQBlpNOOumAttMoTau2v9oWgX1Aq72QT22W90V7ttpqqzV9XwblFwUuB0W9cm1RYBAUSI0TNw0E1QVf/5yS1GFrEMwj1FlotDU2OXCpkefoADlz/tKQWVfD4mnoLsEr2NU5CuAEbAJUDRbIqwuGj6accsoIZZxWzqDASeKock91clwDw2zARqclgEuubqvfN865bBwWr84T1EADEUHHwXXSCfqj05pzzjkHdBocOPNJE4D5+5xzzokQr+PRYHNJOUM6RuXUCi417lwDOmy77bbdKm8ADy51UJxIZSDtOnbgwznmjKX0JLiUdh8T3DmQksrfx4mO6/zzz49gsd5668V06Si5HGneGmeNFo1zLhPU6ezlT2cPYtUxzwSrzeqOztpzmtXNVveW/xS8E6lM09SCVK9THHooq4svvjj+iP5+poN2vfds5plnbpkeDhmo86EDrEAVqOQiynuCywsvvDDmX31Qd4EUpxlc0IObqMyqw+J+3ypt0s6p9u67j7JMHyit4NKH1iOPPBLOPPPMmD//lndwBDTUB2XgvfMueieroQqXjRX2ueeei/WF5txh7wNHF8C6t3fYojUfI2loObny2jPAntpA+VFfrr/++uiia2PAfDU0m3MpTrqfulSFy7/97W8t6yAX18exjw5tEz28Q/TxAaZMtQvpI07ejAIIdXDZrC4bTZp22mljHuUV8Pr46gouGzUXn0vq2e3AJeD3vgF8efR/6RCMxGjPBncocDm4FS33KwpkKpA6qzQ3MF02yyyzxEUhL7zwQhwuA5AatDSEZeGNRrs6Xyl14BowDVmjc5mAhmuhIU2NWldwmTr46sIaHSu4BXHcjFZwqUOUP26FDgZAAlyOjWE5jkwa0kwNPB2Sq+u5Oi9Q2+z37cAlNw5ICSAWZGloNfoCIEhzzNLvAaUhP8OKgML100wzzc9KOQcuwYOpCY3zWw3/uh/Xx9B2q/IG5uAyfWxwCEFwmocFFDiCXFtuXILLBNWGbEFAyivQsLiqcXWq4TYfPWAgLZRyTSNcKp/q4iz1Sp0x1NxV3VHPpbdZ3Wx178ZXTF0CAGkhlSFA0wfUP84WPbiDflYXrr322hi3WXp8dACzusC1VUeAiM7bcKegXqkryeFM1z7xxBNxwV4VLjlJrdLGGUwwCHzUmbRgrNWweFpgl8AsjRiYbkN/w9HqHsACPI2LyqpwqZyrC1doKr8cWHWl6qbKq3ZB/XT/1D416pfasPQRk9o2kLnffvv9LHrKQwI4v/Q+mfqg/qapNlW4lB8fD83aL/UUEKcPzeoD07B49WfV+9TBZbO6nNoz75t/V+eqt3IujTC9+eab8aM0TTtIz2iEy8a2v/qhy9WtjkJU8zSk5noXuMwEgRKtKDC4FUhQpcHmcJiHpkPiYoIxX6iGkHUQ1RXWYE2HCbZ0iBoODar4hoHARyNcamR1Hu3CpU6BO2moEVgYzuOeCL6AUyfSzLlMTlmddtw2DZ6VvIDz+eefj66axl4HSgONuS/8Vr9vBy65eoBL4AaZCpA6kQTcjYsMqnBpPqjruHNgXyeRXLCunEvgaO6osPbaa8chZnlN7gqNOWGtypub0h24TJ11+piQV2lJLrQPBYCWVtxW4bIKBlW45KBxw9VF4KMs01xGMCtuq7rDLQNzdXVTmba6d3IqU71K7jLoli/z4gRzTqUJCKmLQETefdRwP71vpg14f6S5WXoAALgEMYZMaQXUdfreTdMZAEqCfs9ObpUpE+qx4VTTMerg0gdHq7T5MPFBx4EEFemjwnNaOZeGPA19pgV1XNbNNtsslpUPrTRVojqsXH1Xc+ZcJnDSLqlHIM914FJ9BqVpGJzDaS6vua7en8Z3LQcuG+dcqj/eo1SPqnBpZKJVHdQe+HA95ZRT4qp9enEufUCnYXEfoMrQ/Hj1SN3UdtXBZbN2Ns2rVo5cciMOXOWunMvqnEtGg2enOsYZbtX2V+HSO6AcfFykaQB+po0VrzoSMLj6uQKXg0vJcp+iQJsKGE7VoAlpQQzHUrAYA1SZN2nelqAT4w4YNhIMK2lsDLG53hcuR0PIhUtDqYafNJbmBKZnpaxwU0CA+5sADqZ0HBpZDo30tXIukzujMzb8LegIdJIaNk6TDju5soYHAXCae6VT5vy0+n0jXBrqAkfcPA2wjrQ6TxC8WwijY+UOc4HBUeM8sNRpV+FS+kC8DwLgYi6VtOY4l/INzNPqcE4lOJF/aQGnIKZVeasz3YFLHSR45254nq2nfGwY6la2AF9dAAMCvThHtGsGlzpHnbD86NC5LPRwf86chSyt6o6ybwZznMJW9wZo1VCdtys/YMAQfdI6QUECEb/3oZAWxXjXfDS0gssEqspafdaxex+9wxZCNcKl4XPpojUQ8eGnzqfh5zRNwruuzgIc6a1LGyDw0SFv4gODNFTbCi61A+psAgll6kND4FpyTIXG0ZOkbRUu67bb8YEBloCvd138NPdRPTbioo4JNAOXYC+NWnQHLmlgEZvgXskx9fHgo7QKl0C2VR1U5tpPOkmL94/TqW4aDk9zLg0pyx/N0whKO3CZFuNIO+fVPPDU7rdyLqtw6YOI2ZDc8TSdp1nbX4VLH7Kpr/GRZHqGOgEqtTvt7qGZ09UVuMxRqcQpCgwhBcx3MdRaHVICHTpnnZ/AGePopUUuGhNAZ24X55L7lIbeDNFxY5rBJZjRqaUvZp2Boda0krtx6ERDyykxXysFDZLhJB38vPPOG9NSXaCT4plnmbZ/MTfLHK0UktOk47Vyurp4BOhKv44vDUO1+n2Cy+Teaiw5NSAHPOqswVL6PX10/BrYFOhm2E2n1djhAS+roQ2LG/5Lbohr3RuQ0ZPTxn1qtc+l1dkgovpsYGnlZ5rL1aq8uXIc0ORepI+DNCxuHqa8N865VD5p/0cus7z6WEl5kxc/BwnqImAAXs3gMmnpPobVqvUXtFjs1VXdSQt6ktvTWDdb3bvudUzzRBNQisPp5SqqS6YJpEVuCaqUm48HcyS7So8y46yljzv35wT62DBs7UOs6lyCPh9UdBBMC3FtKhsfTGkxhY6fiy4NdWnjDnuX0sbdnpMApdk+l/IGqtOHGigFSQkk0gIlaVt99dXjgrzG0NU+lz50LAgCT9onQKYeaY+SS5rmQKd7mxoD5uvetQTkaW5iNT3N5lzKI0fQeyVU4VJeW7VfRmLoWi1TrjgXG5QqU22DkSXvpXYsuf5p4Z32Ji3oaVaX1Ul1zMehoN3zod5su6i61eLSkLZ6kzY6t2r7G/e5BKNgOk2BSaMvrfYxHZRur8DloKhXri0KDCYFgBjwAENpi5rqrTVO5pEBL51Q46kjaYVmdzZe19hoXAFds9MmdBYA0/11UkMi6DgNPemU6r6ku/p9o16cq7RPXF16QSHdQS/nqZ3AgTFs1t3TOeSFnp7LMW3Mb1flnZvWNOcSANobEGA2lp+6o5MdlHL1kQEwadmoyaDWnVb3rtPBe6RsDdfWbbOivnNwaaGutbsVi3sDAw6kqSytgmd5b5Vx3dCjNNCHZn7fVdqAnDhc6NxADyDSWC4J1qpAnHvPxngA1jthsVLdu2sKAbAyRO+jpqdDV3XQ0DgnzwdzckUHZxp9RCgHddLHl4VTpidwMX0gDkpop+3XrpiaoT54V4eEY5nyUuByUEq1XFsUKAoUBTpYgSpcdvoG7R0sY59LWnIIZYyj1oknGPUl0TmeaYeI6lZEjdvQ9aU8F7jsS6VZ8lIUKAoUBSoKGPI1v9R8LXMiSygKUMAwvOkCnLTGVdlFoSGjgDmh5qOaNsPx5mBapNNXQ4HLvlqyJV9FgaJAUaAoUBQoChQFhoICBS6HgujlkUWBokBRoChQFCgKFAX6qgIFLvtqyZZ8FQUyFLCQw6KC6kIgK1NtoWIl9NCYfJ+R7KEWJa2qH2oJKA8ebAr057KUd4sC6xaS1f18sIlebhQVMFXFwqG67Z36ikQFLvtKSZZ8FAW6oYA96Oy9aDshqw6d3JH22nQ7q63tZ2iftMEdbK1hhXGrs6AH9zPT/ew5aCuYdIJJ7nNsc2IroHSEZe51nRrPFle2lWpXhyGdn+6WT2667NFqi5zGbXxyr++UeLbMMW/SKu12gm1qnn766QHn1KdrbcNj2690FGU79xzScdM2X7bh6YRw0kknxV0JtGPtBluPObnI1lB9NRS47KslW/JVFOhCAR24jbTTpsc2y7ZXnYnntuSwJ6DNnQV7uQ3u4AQTDoo99no62BrEGeF1Gxi3Sktfg0sfFDRodrZ9T5dLel53yyc3vX0FLoGl01/sS9pO6I1waesuWx7JcycEq+zBpT1O2w3pRCH7Y9p3uC+GApd9sVRLnooCGQpo1OzV5xQLJ6s4bSOdQZ0uB5U2ieZe/vrXv46bhNsT02bJttGwqa897Gx6bINeDqfNpsEKV8zwuusNsTvL2F6aNoq2r5x99gRnrNumg2MCdOzjZ/9BaWkctrNXnHjOQnbEn5M0pMMZzp7LRXC99LzyyivxvmBZ/pyuYaNoG9Q7BjHBpbSKn06w8G8dtnuCEJvc29jaXoXyYONozqUTaNzD7+xXaUW2fDaGuudLO11oxL21352Nx+09Z6N254ubrsBFtsob1MqDYAW4cnDSis2Ydbo2uxaHC0kzxwM61cVG21amOhHJ0Y721PMc5cStPPDAA6NGNvMHmjZgt9+foVH5twG1TcBpbn88Hx/SI+2eY2N5Wnge3dUpHaaQU57N0lmFS3VMfVG+numjiPOjDD2TA0d7+1U2u5/0KAcb9tsEWz1QN9WXyy67LGquHglO4lGOyrYaaOtDSBps62TPSUcFCnXpARCt9GlWf+ryazN2+2XS316R3h3HUNo70rvIRVNezXSx56s6Y39V+12mU3LU5WrgXKpTTvryvvjIpK13xKk6+++//4D8qj+Nrl2zOun88Wq9szl/Xf6lS76cjJPOOXdymPfLQQDeB21Ryk9jW+SIQ3u6pr0jHTqgXVp66aWb1u9q/hvfD+9Ds/tV4RL0+j830rtDr7QSvNl7MChwmtG8D/UoBS6HehGUBBQFho4C6UhGDiLI0LC2Gh7VcThFRSemEwAAjnjTgToSLZ3VreF3Xrbzs91Pp27DcZ0EsNO5i+toRuCiI9FJOA3joIMOClNNNVW8ty1S3CcFIGHfRoAKsJzIMsMMM8ROSmeoA9ao69TBklNAAJPhT6ff2KDe88XRQSe4dEIFJ8dZ2IL/O/mCG6SzddyavOu4dCI6ZNeDNaBlaoH8gG8wWg3JIWt8vmfIv43LnRACXACiE3Z0riBWx+5UFxAFaK644oroJsuT03OkDQCYygDwgRMtnSAE2J3wZP9Cx4SCcg51Gm6moXggKw2LH3PMMfEkIp0iaFB20u/jQ+fsiEDDr85jBtpcbz9zLxBquBIEyoNTk7oqz/Rh0SydqXx0wtJiioYPADAPbmkqHeCHViCgWb6BJC3o7RqnKalPhsVBjLrhBBMACzKVfXVoWL7UCeXmQ0CdA3o+hmyIXZceujXTB+Q1qz+N+TUf2geZvSm9Rz4CvYM+wJSZQxWctuMUqma6qDu2HlL3aSk/Pgrq4FKd8oEljeob2LbpvAMH0giG95BmNgJvhNO6Omket2koqd5Jf7P8p2F+H5fq9a233hrfYe87uFQ3m7VF2hPl6PhQwfuufLwvzeq39zsF7U41nT4ymt2vCofKBlh6l51Vr62QZh+Rzd4D7539L2nceCjG0OkRBu9TC1wOXj3L3YoCvUKBdNSezojjBcw4Uxq6uqCjAZBg0hF4ggb+22+/jU5CV3AJXHXKG220UewAdcrVYXGgAgZ16lxDIKRhr+7NCBhBsM6C4woOAJZjMLkD0pbOigZ8OlwQ6jSVBH2cQq6gjrEruAQSOhAdhYVN6ahFHbK0pOMEgZ/n0rDxGL1mzwfDHMLUWXNy5MExemCcK5Z+5yxg8KQjTxrQx1xZv0ubMwMIR+spD3lTTtKmg+XggD6gqvMEYty36pxLDp6FBgDVHnzyrWMH+KCM7gJnEmzSAzwlEEt1Snro0VV5ulerdFbhEgS6p/yAP5229AmcPulRd5vdT/2iXTreNJ0/777+rSz9DqgqA6cCVU/uAXTymxwxeXaNetwsPZ7XTB9pb1Z/wHs1v9ICqgCfU3C4lBxu9aE6LN4sHa73nqiL6fxyZ3BLWx1cgm8fS4L3Gvio296ndFwlMFSP0pGlqc3wnLo6qa5W6x1HsFn+Oe4+rtRx5cGlVTbSTgdpa9YW+dBtBoPN6rePlka4TO+HOt8VXPqokmftlpEUQVmBVm1rs/eAmyt+49G4vaIDyUhkgcsMkUqUokBfU4ALwA1J0KfzNOyqoUxnmsszqLSAhVNhyG2llVYaMPTLMXNco0npVbhMLk9yLqsLRlwPXnSeVbjkOFjYA244m2notdrw63TqJsAbipU27ogO0zAh0AAiHElHnaUh5Won0gwuU4fNnQUICbilUeepQ9bBgcPGAAxolUKz5/u54WoaCYbAddTyAQTpnhYucOO4vjr4dHa4vzlPADKdqcwJ5TwqWyCg8wLg4EDaE1xW55pW4RK4cCC5s66RD+6bDjKdvy2tykBZLLroolHv1BH7HWClD5joqjzFz0ln4/AhPapnmbuPOgP0mt2PC2llbqoH5smZ7qAeyjcnTT7UTR9MjfWFa5nOZPe86rB9s/TQv5k+reqPD5TqXL7pppsuuuoca/kUOMiNcNksHWDfh1f1w0fdSWVVrcPqt/qUPiDVOa6lsvY7HxreK/rVfYiKU1cn1atqveOGNnt/QJn4oAt8+SgF/GmxITAF93VtEYCswqAPLffxYdKsflfz3zjXtxEuq/dL9dI7X7cA6v3334/vcLP3ILWTPibBfl8LBS77WomW/BQFMhTgCoARrhZXLH1FG97RoaSw++67x2EojhdnpOpWAErzGrmJ/p0gIzl8CS4Nb6VhqmZwCYp0Kl9//XXsuH3tg0auVArpLGS/1+EIHA0ujvPYdTocSsOVYIETqJPiVJqLJ8gHJ9O8sypcepZ8gDEuhM7CPC+Ol2dwsaTR0Cp4MtRs+D5N5jcUz4nhnFS3F2n2fB2ddHHchASN0q9czJVMzhxo4M5yrBrh8tprrx3glnCSpd8wnPl1gIgGhkF9SHQFl5791FNPRcDXIYNUHR9dQIOPjASEylP6wFOaDlGFS/p2VZ7mEOaksxEuDWOblgBwBVMqfOR4ZrP7GaLkFqd6AJpNI0irxd3TMLP3wrQAzlw1+L9tu9QfgS4gG9A2S49ya6ZPq/qjHlfhUl00pO09Uj7eNa5+I1w2SweIVL/TlAnpNyXGAr4655IDJw2CEQCQ5N02XYOGAJf+QKsxqCd1dRJwVuHS9IZW7w/nnH7qoDZI25PgUt0Ex3VtkRENuqTpPeq9EQBw2ax+V8GuDi6b3S/VS+nh8JvXDJoFQO4DzYdKs/cgjXakD/yMZrtXRSlw2auKqyS2KDB4FDAviDNlCyIOoqDTMucMTHCe0qIX/9YxmKOnodTRGALVYQAZDb/OSscCBnUcvsq7gksga2GORRE6dLDBSQN1Oj731IGkkL70dTw6SvfX8ANAw6TAAgSYG2ahA5D1t7mbHBDQaE6UfFjQkjo7+QZl3BH3A4w6dGBII44i5wj0mbunQwZgHFmdl47b39IPTqrDqTSse7600NmQrM4fbssSAAAgAElEQVQ8HdOoo9GJ5sKlzkt6dVTmq4Jb9zZ0DdC50EAbfPmAcP9mziVdQAuI8Xzz83Ts5vK5PweMbp7BzbQIphk8gbiuytO8xZx0NsKl+gXODZlyfzltad5ns/v5QFAO5t2Z7qDucYsTXHI8acOVE7dxDhwd1Cf1AMCo++bYgstm6VE/m+nTqv6oZ1W49H75mOKm0lW9kQZQLy/eFe9hs3TQ2T18uJmSIh8bbrhh0zmX6m8atpdvefahkj5sElzXuW3agLo6aai9Wu/Sh2Kz9ydNW1CuySFNcOljt1lbJO3u6Y+0gD3vtvrbrH6nhVzy1QiXaYSn7n7VegnsOducTo6sjxnPMz+42XtAA4CePrwHT8veOXcpcNk5ZVFSUhToUQVAkyFgQ7ECRw9QAcwUzLUDk1ZqGq4FVpxBwTxEjbGO2HWpo7YAwRBeV3Bp+Mof7qdODPhZDMAZ0RFyd6pD9J5pCMrPU9CBgCF5kS7DdxwmHYZ0WMBiWEpnLeikdZyGu7lX4CB1dH7PdTTkav6ioTedig4m/U5n4TrP4yaaxC+AElDQODymY6t7vueYl2Xxk/z6YyWuYcc6uPQBoDOrOpzAxXO5pgIol18dLOBN5agMdXgcKFCU8u2a6rA4GAXqaegVaNLKYqA0hcE1yoZmH330US08+SDgJHdVnml1eVfpbNxPkGZ0BocpPaCJg90s34YnTUXwMSNIO4BOdZaL5p5WngOvxkBjLjBQkG7TNUyb4JA1Sw/96uAyOW7N6k9jfmmtjgumfNBWeXoXlRlH3b2AVJ0uwE69UO7eEXXmm2++iUOxyY2uvk/g0MenYEGZ9z59MHETve9c+rrgfayrkwC3Wu9c2+r9MefYVBDtU5pT7L1I00WatUWpTqkb6rH33XSZNAe4rn5X85F0SluUKXd1qu5+1XLSRoBw7Zdg4ZVFY9rBZu+BDxrz3JObXitoL/5hgcteXHgl6UWBQVEAqOl0G/ewBGKGbLkj1TmPngXcdLC2K2ncdkfDLxiezA2u4Yb6o0OzrQ5Isoq5WbCCVIdqXmb1ZKFWz5QnLoh71wXPtljAcF9j4Jhyaq3KbdwayTAhV9CQKve0WWj2fFADclxfdTxz9NORc2sNyYGGxtOUzNWjj5NAlJs/zfKfnqc8uLOmSrguBXM2LbIAliCuUYdmmuaUZ3fSCSLsFAD2ObXV9LS6HydbOTXWL+VjCJiT5H6NAZx4J3woeBYoM8zKDRZapadVWebWH/WTI29Y3PNd989//jOCHhfUh4N/t0rH6KOPHh1f8zXBZavgnpzZxveBU+rDLW1J1HiPrupkY/zc/NeltVVb5IMIGDeegNOsfnf1vjW7X/U6IOpjhZvuHUmhWbvG7TRHFKT2xVDgsi+WaslTUSBDAR2RIW+OjKHZEnqXAqkjNw+0hO4rYHqDjyyuYDMXCSSbU+eDixsGLM0t5D71h0AXQ75g0PQBAFUXSp3Mqw2cV/N2jUTkfKjl3bWzYhW47KzyKKkpCvSoAr60OW9D4wjGHs1oH3yYOY/mRVqwUUL3FeBWGi7nIBnObBbMnzXfz36kPsrM2+wvwbCvYV7TX6quXGP+S53MqxGG301v6MvvboHLvLpQYhUFigJFgaJAUaAoUBQoCmQoUOAyQ6QSpShQFCgKFAWKAkWBokBRIE+BApd5OpVYRYGiQFGgKFAUKAoUBYoCGQoUuMwQqUQpCtQpYNsPq2g7LZgTZVVoJ4VOTBN9OjFdnZimolV7b1MnlmEnpqnUq75Rr+wv2xiy4NI2HCby2q7Blhm2KLAHVeO5ou3JVGIXBXq3Av+7wuG9JgPDPHBGGOaDl4daep2GYu/ITgudmK5OTJNy68R0dWKailbtveWdWIadmKbeVq+y4NIJFmkPO/ts2Y7AHlMLL7xw3J+uhKJAf1SgwGV+qZfGumiVr0B+zFKvilb5CuTHLPVq0LXqEi5tDGqXfEvmbVhrLzA71jtloHq2Z35SSsyiQN9QoMBlfjmWxrpola9AfsxSr4pW+Qrkxyz1atC16hIuzSmzMarTH5xw4RQAJzc4scCRTl2d+JCfxBKzKNC7FChwmV9epbEuWuUrkB+z1KuiVb4C+TFLvRp0rbqES4+ongGaHulYMRvKNgvO87TprHM1BWcIWwBhM1Zn6vbn4Lxe7m/j0VT9WZOeyrvpHIOrDha4zC+10lgXrfIVyI9Z6lXRKl+B/JilXg26Vllw6Zi4pZZaKthVPp2/u9deezU9AkqyHCfnz6WXXhqHz53Lesopp4TTTjstzD333Pkp74MxH3744bDWWmu1PD+5D2a7I7LkTOrBVQcLXOYXaWmsi1b5CuTHLPWqaJWvQH7MUq8GXauWcDnLLLOEYYYZpulT/vGPf9SeiznvvPOGQw45JCywwALx2kceeSTC6YQTTljbse+///7x999++208v/W8884Le++9d1hmmWXi9RYU7bfffmGrrbYKiy66aLB63XD9zTffHM+ClUbHTn3++efhL3/5SxhuuOHC6KOPHofzt91229r0Tz/99OGoo46KQ/0WK7300kthnXXWied9Og5vxx13DBNMMEE4//zz4zOtjOfWTjnllBEKf/rpp/jvN998M6y++upNzwd1Zu2uu+4aAPp9990Xdt5555DgcqyxxgqHHXZYGHHEEcPzzz8f3d3vvvsuWNbv/FbhxhtvjHlzfrB7TDPNNPFZu+++ezyCrJ1gxT9tpptuulg2niXk6i8N0kofC7nMw7WoiwNLOx8Qg6q/I9W4uj5K1l133TDffPOFzTbbrNv6N+btiSeeKHDZTqUZTHFLY50vZNGqaJWvQH7MUq+KVvkK5MdsVq9awmVX514a9v73v/89UCoA2vzzzx+dTgBmWH3OOecMda7RmGOOGWHCNdNOO23YZJNN4lxOPzvmmGPCKKOMEn8GtrbZZpsA1oAnmJlqqqniv515CtTOOuusCEonn3xyeOaZZ8KJJ54YHBD/9ttvD5TG2WefPTqphupfeOGFeN1qq60WNthgg7gafr311guTTTZZHNaXdoDsLNDTTz89HHrooREqgaHzaDfeeONgFX1duOeee8Lll18eF0V53pprrhnvybmUPlAJnvfZZ58IkuDKecHyCiLd3+H20jnGGGME5+BK6zfffBPvkcL6668fZp111p8lQfmB4xScHw2YQWY6F7cd/UG/+HvssUc48MADI+z++c9/DgcddFAE/aeffnqQ9T/uuOPC9ddfH3WmAXg1jaA7+tflbfPNNy9wmd9uDLaYpWPLl7JoVbTKVyA/ZqlXRat8BfJjdgsuwd2www7b9Cm77bZb7bxBcAeMtt566whsOnTz3OrgctNNNw3LLbfcgPmbIJIDxrVMczrN0QRgY489duCKcjmBh+vEB5qg8Oijjw577rlnmGeeeWKaQR330s8bA7g84YQT4sIk4d57740OIaesGVy69y233BIefPDBGFeawOMRRxwRgagxjD/++OHaa68Nc8wxR/zVH/7wh/DPf/4zxpVvTiCI5iBKD2Dkgs4222wxX1xKf8C2oVxpPPLII8NGG20UdaVDCu7XCJePPvpoBO4qXIJZ+ahel6v/0ksvHUGcEwt2Lebi8AJBTuatt946yPor12Zw2a7+dXXr/fffj3A9OKZmlGHxQW+A8u8wZGJ2YofbiWmifiemqxPTVLRq713txDLsxDT1tnqVNecSJNUNj7/zzju1tcgG6x988EHYfvvt41D2SiutFBZbbLFauDz44IOjQ7jGGmvEe00++eQRngDe2muvHX8GNjhjnDtOKFfRfQEYJ7MKl8BnwQUXjNddffXVEYYMIdfBJZcsgShI5MZV4RKAHHvssQOcSw6poXEuJKcVvLWCS46j6QEJZOj42WefxSFeoOR3d99994CkOUEBoEk359UQrqHhBJc33HBDBDk/k2dTAlLg4o433ng/y+a7774bh/urcOl5xx9//ICftaP/4osvHlIalDFtQXYVLgdV/ypccnNnmmmmAc5lu/rX5c2Cnu7ApSkL/lTDHw64o71WdCjGLpuo14vfiZ1IJ6apt3VsQ/FVi4/uxDLsxDQVrdqrqZ1Yht1yLlO2DUkaAm0MzYbFOXng0EMBkPmSHMKqcwnquEjmO3IWOZWGxV3LDfMzUGW427C6IXbuZFdw6VmGUb/++uvwt7/9LWy44YZx6DkXLmeYYYaYZnkz9xFQpWHxduGGZpxTDu6zzz4bbr/99pgeQ9KGtC+44IL4jNtuuy2cccYZUQ9D+FxJw8FbbLFFHHLn4BpSbwWX3GJzZKvBaUqeXQeX3dHfMH5XcDmo+psC4aNlhx12iM8yDzcNi7erP6hvrFs0TXDpo8Y8YB8I9nOlM5eck+wDRVm89tprTd/+4lzmN4yd2CiWji2//IpWRav2FMiP3YltQyemqbe9g1nOJfCxkEMYfvjhw+9+97u4EAZ41Z3QY5gVIAIk8S1k0VlzJdNKXUPK5gSaMwm2gKVgjqB5hoZdp5hiiuiYvvLKK3GuohXqzeCSm8llBDc2egd2L7/88gD3MxcuLdYBchYEffnll2GkkUbqEi4PP/zwCLJ1wZxE8z7lgxsJFi3okR/D2tItmEZgSBxMARz6WjQkH67jSlbhkqtbHRbPeZXNubQIyHSA7uhvLmgdXCpT9QAkD6r+6ovpFvIN8j788MMu4bKV/o11y9SCVAc52txlDriPHJqkDybTH+68887oLjcLBS5zat1/45TGumiVr0B+zFKvilb5CuTHLPVq0LXKgsvGxwBCLo8/hokbQ1oZbRgRGAFFLprV2c2CFeBWe6cVzOIZRjbns25BTt19gIHV2ODWYg7XgVE/bwxAiJvYLICNusVKdfG7egYwt0rbkHhj4JhZvf3kk08OWHFOP2BtSNvvDeV+/PHH+aXdjZidpL+dACzksll/TuhK/7q85dy3qzgFLrtS6P9+XxrrolW+AvkxS70qWuUrkB+z1KtB1yoLLi3ISc4l2ONYgjcuHCiqC4Z2DWtz5iaaaKK4vZBFPUMyJLisPgegWizTGMxndOrQ4Ag98YzBkc4hfY/+pn+By/waVRrrolW+AvkxS70qWuUrkB+z1KtB1yoLLuvmXNrb0JCi4dxmAZCmU2iq/85PdnsxuXyGs3Mdx/buXmJ3pUB/07/AZVc14v9+XxrrolW+AvkxS70qWuUrkB+z1KtB1yoLLrmP5sIJ5gGal2ihSjm+ML8ASsy+p0CBy/wyLY110SpfgfyYpV4VrfIVyI9Z6tWga9USLs07tKDB37YCsuDBFjjjjjtu3IvSiufceXH5SS0xiwK9Q4ECl/nlVBrrolW+AvkxS70qWuUrkB+z1KtB16olXFq1bD5hCj/88EM8djGFZlsR5SerxCwK9F4FClzml11prItW+Qrkxyz1qmiVr0B+zFKvBl2rlnBprqVTY2wubnuYUUcdNa76tp2NPRn93WrOZX7ySsyiQO9TwFZaTlPqtNCJDWMnpkm5dWK6OjFNRav23vJOLMNOTFOpV323XnV5trgO1GpxR0E6L9zm29XtgtqTpsQuCvQdBQpc5pdl6diKVvkK5Mcs9apola9AfsxSrwZdqy7hknu55ZZbxj0qbYTeiU5NvgwlZlFg8CnQm+By2rV2DaNNOv3gy3y5U1GgKFAUKAoMsgIPH7JB+Ol/f8y6TydCb7M0dQmXVoc78cb+lk6ZMe8yhaWXXros6MmqEiVSX1SgwGVfLNWSp6JAUaAo0HMK9Eu4fPDBByNQNgsFLnuuApYndZ4CBS47r0xKiooCRYGiQG9SoF/CZW8qoJLWokBPK1DgsqcVL88rChQFigJ9S4ECl4NQns4Yd764Yxltb1TCfxVYeeWVw0033dTtzeidkOT88brz3YvGQ16BApdDXuPyhKJAUaAo0JcVKHDZzdLdZZdd4obrp5xySjjttNPC3HPP3c079b3LHn744bgRvROP6sJFF10Uz2S/5ZZban9//fXXR2i/5JJL+p44vSBHBS57QSGVJBYFigJFgQ5WoMBlNwvHavOllloqTDjhhAPB5fTTTx9XoX///ffxFCAunBOAppxyynDCCSeEMcccMy4Y2mijjcIYY4wRjj322Ljv5m9+85tw7733hplmmimeJe7koH333Tf+/KyzzgrjjTde+OijjwLH9O23365N+bnnnhtdw0svvTSsu+66Yb755ovPrEvPdtttFyaeeOKw4447hgkmmCCcf/75YdFFFw0777xzdB9/8YtfhMsvvzwcffTR4e6774575znf3P2efvrpcPbZZ8drJp988pj+I488MgDDVnC50047hTXXXDMupvKMjTfeOCyzzDIxL/K73377xXuAy9tvvz3+7Q9nuFGD0UYbrTZfiy22WNh9993DyCOPHJ577rmwwQYbtFXKU001VTjuuONi+b322mth++23j9fLL02mmWaauA+qZ9gTVRktscQSMc6NN94YDjjggLDVVltFLZWdDfqdBLXkkkvGub5+L6/KUdr8TP6cGtUs1Ok/3HDDxW20HAjw61//Olx11VXh8MMPbyuvdZELXA6yhOUGRYGiQFGgXyvQr+ESGI0wwggDVYBPP/00nHPOOeGBBx6orRwg8cILLwxzzjlnmHHGGQeCy9lnnz06mqDohRdeCPvvv39YbbXVwl577RVXp4M3gPbOO+9Ed07c6667LgLa6quvHsHjjTfeCJtttll8BugArKBkjz32iGCz0korNXX9wNnpp58ettlmm7DwwgvH6+vSA2wmmWSS4Iz1ySabLKZl8cUXj1AH+n73u99FgLIH6KOPPhpWWGGFOFQtX0899VQETQC74YYbhjXWWCNODwBZreBypJFGivAIfq+++uqYV3uNjjLKKGGTTTaJ119wwQXh2muvjUD+4osvxv1I6zQ4+OCDa/N14oknRui74YYbwqmnnhq23Xbb8MQTT0S9wDyAbQx77713UO6CMgWSANozXn755fhzZeR6W1gpU+WlLP2c1srlpJNOCob1ATu93Fc+AKt/r7jiimGsscaKgH3ffffFDwt1UB632GKLAOzqQp3+4HL55ZcPf/nLX8K8884b4VVZDWoocDmoCpbriwJFgaJA/1agX8Ml95ETVRdsVQT0OFeNASiCATDVDC7B41xzzRUv5UYCQ7DqntNNN12YY445wiuvvBJh5OSTT45wwAXlFAKE4YcfPtxxxx1x/01/gzoOKJdq5plnHnDvxrQBtzq4rEsPV7MRLg3vc8m+/vrr6IAC8A8//LAWLjmagMY10uSkI/fsaljccPiZZ54Z3dlll102ApLAvQObnMxxxhknwhot/F2nAaCryxfQ5S66xhD8888/P0AmrjFgbAwALZ0nr0y5pPJV3aIKRCpLDi3XWT1wVKgyAsWe6Y88bbrpprFMudvSudxyy8U4QBPUg/0///nPMb+C37n/EUcc0RZcSitHmUOqvAbHfq0FLvt3p1ByXxQoChQFBlWBfg2Xf/vb3yIQGap1Oo+O3RAmV5I7BTBAQGMw5Mk5NPzaDC4NqyYX6Z577olHTa699trRIeRYma/J+QKXKS7QA5fAU7oSXD700ENx/uJ7770Xk/LVV19FF7QuVOFyn332iUPsXL+69FThEkhx0Til0gie/ftXv/pVhCTAmJxLEAiUP/744+g2gvQvv/wygmU7cEk7OtBFkPY0fAzuTSngkh5//PGhTgPpqMvX/fffH7beeuuwwAILREjlzBoeF+Snbn6s7anSCU11Zepa8McN5QpzbIEiiOTAPvPMM9Ed9fMElxxuz1ZfwCP9ElwaPl911VV/5o67tzKvC1XnMunPuUzPMEWA49wuXBqa96cxtHufQW2Icq6v29S2bKKeo1yJUxQoChQFelaBfguXIAMgcQSTc2Y4dtppp40gAHLABChsDNw6Q6ucySqIgElnk3PH6qDHNX5+5ZVXxmFbwGgItyu45B6+++67Ydddd41Dp/bhTGluTBs4Ndy+ww47RBD69ttvm8LlDDPMEJ1H7pt5g4bEDddyFTluHEPQBYgswDnooIOic0eba665Jub9s88+i2kC5OA4Fy7NDeXcAlowZqjYPQAXJxdUGio3PC+NALNRA25wnc7Oi5cmQ+og7tZbb43zRgVD/ZdddtlAZeq5dBOqZcpdVa4cxjq4NHzPwTT9wDOlV9n4uxVcmkqgHihL7ihA5YhyH+sCuG7Uvxlc+jjggvuoUc+lB5CCZ6dRma9b58in5xbnsmcb4fK0okBRoCjQ1xTot3CpIIGTDhoQ6uAtauEKgjIuCUfTEHFjMGTNfdRRW8ySVosDEB0zB64OesAbN81iFkPN4447bgQ/w6MApplzyTk89NBDo6vqD+AFOnUBSIArw/1gwnOaOZfgljMLoDiP5kNyK4GItAFT2gAvTqmhXWnnKopjmN6zxPv888+j08iJBVkgtRnA0GuWWWaJ7i+3eIoppoiLWsCm6+hnONu8TE6e5wD/Rg18GNTpbJELOFaWdKBJGvLOeYGrcGnqw2GHHRbMo22ES44rZ5IWFv/QxfO4mEC1GVy6xgeM+aB0cA1XmrMJ6OtCnf7N4NI8WfM+PUOZqJc+IMyRNSXhzjvvbLl4qMBlTi0pcYoCRYGiQFGgmQL9Gi7BoS2FDJ0Khnm5RxzJqaeeOg5xNuvsOWuPPfZYXK3cTgCwQI7jp+MHf82e0XhfQ9yA0PXcRG5fNXzxxRdxIQoA5frlApV0AI9qAFh+lobi/c7Qq7mYafg4/cyqbDAFUv0O1AErQNMYbrvttvDss8/GvKdnmkdqoVOzFfDVe1Q1aKX7RBNNFEYcccToXg7pAA4BMgjmFNJDXcoJdLBISDq70qxO/5xntBunwGW7ipX4RYGiQFGgKFBVoF/DZRKCY2crl1wgcB14MVTM2RoawXMBZDUAv2Z7R/Z0GgEjt68xmJf41ltv9XRyesXzOkWzApe9orqURBYFigJFgY5VoF/Dpbly5sZxjqrBkGoOaHLGuHQlFAX6kgIFLvtSaZa8FAWKAkWBnlegX8OlBT2GMa3arg5Nm6OX9jzs+SIpTywKDF0FClwOXf3L04sCRYGiQG9XoN/CpXmJVuA+/vjjtVux9PaCLekvCnRXgQKX3VWuXFcUKAoUBYoCFOi3cCnzttOxmMfq2uqCFqfq5C6yKdWoKNDXFChw2ddKtOSnKFAUKAr0rAL9Gi6bndCTtm3p2aIoTysKdIYCvQkuh7ZidRu7D+00eX4npqsT01S0aq+2dmIZdmKaSr3qu/VqmFlnnfWnrrJn2yErxRuD86L/85//dHV5+X1RoE8qUOAyv1hLx1a0ylcgP2apV0WrfAXyY5Z6NehaZcFl/mNKzKJA/1GgwGV+WZfGumiVr0B+zFKvilb5CuTH7Kl6NfwII4Zhh/1l+M+Xn2clrqfSlZWY/z9SszQ1hUun6zgWz/GLjiq0sKcx5G5F1E5CS9yiQG9RoMBlfkl1YqMo9Z2Yrk5MU9Eqv64XrYpWuQr0S7i09ZDzm52P7YQesNkYVl555bIVUW4tKvH6nAIFLvOLtABT0SpfgfyYpV4VrfIVyI/ZU/WqX8JltRgc7+jM6hSA57777hvPsHZWdglFgf6oQIHL/FLvqcY6P0X/jdmJ6erENBWt2qtZnViGnZim/l6v+j1cPvzww+G+++4LO+20U3Baz/bbbx83Ve9qtfgKK6wQHn300XgGORe0E07pkWZQ0HhGeHtNR4ldVWDeeecN7777bnjttde6JcxCCy0Uzxv/17/+1a3rh9ZFBS7zlS8dW9EqX4H8mKVeFa3yFciP2VP1qt/D5fXXXx/GG2+84EzuEUYYIZbQs88+GzbddNPw3Xff1ZYYoNx///2DeZkPPvhgWGuttcKrr76aX7pDKOZtt90WDj/88I45W3wIZbNHb3vVVVeFu+++Oxx//PG1z73ooovi+fLNznNXvy6++OJwySWX9Gi6B/VhBS7zFeypxjo/RcW5LFq1q0B+/E6s752YpuJc9sMFPdXXyGIecDD11FPHTdO33HLL6P61CoDhiiuuCOeff37gfDbCpW2MzNn8xS9+ES6//PJw9NFHhznmmCMcdthhwVnkzz//fDwRyO/PPffcMPnkk4dPPvkk7LLLLuG5556LP5tyyinD999/H4fn//rXv4azzz47xp9mmmliOm36bkHSAQccEBZffPHw5Zdfxnsb0n/99dfDCSecEMYcc8x434022ihut3TcccfFe3LRpp9++ujOGvp3j1FGGSW6to1BvKOOOipe99vf/ja6cOuss05MX+Mz3nnnnXDHHXfE+/s94P7pp5/iv998882w+uqrxzSdddZZEeg/+uijqMPbb79dKzcduMKXXnppMH3B4qvNNtssPuONN96IeZB++q+55pph4oknDjvuuGOYYIIJYtksuuiioa4swKLGiMMrD08//XTU1zXKwlGgtqhSzq3gktvtuT/++GMsb2fUL7PMMjEvf//738N+++0X7wEub7/99vi3P8qzUYPRRhutVufxxx8/nHrqqWHccceNZbn++uuH9957L78n6GbMApf5wpWOrWiVr0B+zFKvilb5CuTH7Kl61W+dS6vFq6vEDYULIAoQgQSdeV3gVnI2n3nmmYHgctRRR40gATR+97vfRQicZ555wo033hihEnTss88+8f8ACSxtuOGGYdddd43OKZjaYost4v9XXXXVMPPMM4dFFlkkXHfddWGMMcaI8Ms1BUDuDXK5ak4ZWmONNeLPAM+www4bwQo8gT6gBqCA1MEHHxz/DXD8nBZnnHHGz+aepnzPPvvs4ZRTTolQ5NQizzZ9YK+99hroGcDOpvTydfrpp0cwBpWg+qSTToqaSBsoBLR77LFHBOWVVlqpVmdg5o97bbPNNmHhhReOEOcZL7/8cjjiiCPCiSeeGOET+E4yySRhvfXWC5NNNll0CkF3XVmYzm53Z2gAACAASURBVGBaAwiWr6eeeiqCJoBVFnSk+RJLLNESLgG79Hn+1VdfHcv2mGOOiaC+ySabxOvN57322msjkL/44oth8803D4cccshAGiiTOp3dZ4oppojgryzlnZ4p+JgA7NUg7+rooIQCl/nq9VRjnZ+i/8bsxHR1YpqKVu3VrE4sw05MU3+vV/0WLrlfdVsQpdds+eWXr10tDig4X4DRsHmjc8ld9HvD7ECRGyae5/m/fwM2cGiTdnP5ABvnCnRx8vyM6yWACZAJLO69997oqHEiAR6YWXLJJQe4Zc5J33vvvcMDDzwQXcLpppsuOqavvPJKdE/B5Nxzzx1++OGHeB8O35577hnhaP75549pagzS6tmmAgjSAAzrnrHBBhvE9LqnYWKAI650AlgwuMMOO0So44By5cBzunfjs1vBpWdxeUHyBx98EL799tuB4FJeG8viww8/jHNlG+GSPrR0jTT5SOCUdjUsLp9nnnlmGH300cOyyy4b1BuBOynfPlKAP4hWZ/yd3N2qBuC5TucJJ5wwfmxI85VXXhmvrQag6qOjGi688MI4j3hQQoHLfPVKx1a0ylcgP2apV0WrfAXyY/ZUveq3cFktChABUDiVXCLg1Cxwx3TygAik1Q2Lc844VHPOOWdcHMSxA5lAJwVgtMoqq0QIO/bYY2M8Q5/cK1DC6RNABVDkiN1www3RxeOwccIef/zxMOmkk0anTQAUQHXttdeO7p3/+z1oBE+nnXZahCcBQLkXCJplllliWuoCuDScDowEkHjggQfWPiPB5YorrhgBko7bbbddzEOCS26l4fI0tGshFBe0LlThkts700wzDXAu0zM4rspNHpNzKY80pX9jWVigo8wSXMq/dH788cfRbQTHphioE+3A5Ywzzhi1pr0g7ba6AqyccM4ql5TL7COgUQPpqNNZ/VDWwNWQPXAULwUfD2mucPoZh7ydRV0+aPxpDLPNNlt+i9VDMXuqYWwnO52YJunvxHR1YpqKVu3U9lKv2lGrE+t7T6Wp38OlIUZz804++eTopoESC3rMbasLIBCM+L14CS45lmONNVbs1DlZFvtwqbh34OCcc86J8yEtugFE77//fnTbABMXkuNobiIwAH5+BoDM6wN2hsUb4dL9QKN4Y489dpw7alic6wlAQLB5mUDGvMkqXCYY5d6a02d4vB24rHsGTcBZK7hUsa2+NuzPkVt66aUHuH2Nzzdf1ZA+t1Pe6ZWGxRvhkp5AzjxSuhgSNwRfVxZ0Ouigg6ILCwCvueaaAA4/++yzmCYAB1Rz4ZJ23GF1BwSqB+7BcVavQKWhch8Z0ggwGzXg8NbBJb18RPjo4FKCZR8bKYBP964GcX1YcD1BvTrLUQWwnHN7u5rn2moFfHEu87uQnmqs81P035idmK5OTFPRqr2a1Yll2Ilp6u/1qt/DJfgzz9JcO3BhmNVwrUUiYLMuAETQYjgZXIIYrtcMM8wQlltuudiJcyHBEOgBGeZKmg8ogE6AMMwww0SwAyN+tu2220YHlaNomNy8SfPngGEjXHLIACwA45hxxyws4fABqwUWWCD+3zCwtIBBw9XJuZQOYGo4HGw020qpmXNZ9wwQDaqawaWV7FxGczFBrT9cUHmrC4Bot912iwuZQJG8NMIlrdzT36DK8DTn0fQFzmVdWXBKlRN96CaOsvYsZWaREKeReyweQDdkXRcAO+fXvFHD/pxv5Qo21QsOphXl6oohbc8xD7NRA05vHVwCQ2VqmgW9lCMg7ir4yLANknmf8sINT9trAdI777wzwmqzUOCyK4X/7/elYyta5SuQH7PUq6JVvgL5MXuqXvVruEwupNXVBBc4av7NSWoGPVtvvXWEN+5is8AJ42JWV/Z6nnmQTz75ZITJFMBf4wrgqaaaKl7/6aefdllruFnigqoUzKf0f8AMLoBZ9ZnigSFgaw4n1xR8NAYgzaGtCznPaJZ4ji1HVRpBqXRUwxdffBFBH1Bx5potrmq8v7w2DgnXlQV4B2zV7ab8bOSRR45uKUj1O9DdlTbVZ/ow8VHQbAV8Nb1VDVoVsnSpD4899thAZdhl5ehmhAKX+cL1VGOdn6L/xuzEdHVimopW7dWsTizDTkxTf69X/RouFb55kFwu2+JwrWyRA8KsFrbgpi5w0rg/Vnp3wv6W7TUN/41tDiiXjWNrFTMo4lI2hieeeCK89dZb3XlE9jW0bhzaBX7N9o7MvvFgijg0tRlMWWj7NgUu8yUrHVvRKl+B/JilXhWt8hXIj9lT9arfw6X5jdxKiyVAo4UdHDPDmK0CF9KCnkY3ML+Ih25M2yQZipWHEooCjQoUuMyvEz3VWOen6L8xOzFdnZimolV7NasTy7AT09Tf61W/h8v0WhmaNrxpz8MSigL9XYECl/k1oHRsRat8BfJjlnpVtMpXID9mT9Wrfg+XFmNYOQwsBYs5LLKxUKaEokB/VaDAZX7J91RjnZ+i4lwWrdpVID9+J9b3TkxTcS77+fGPVtECy7Q10EQTTRTfMquhcxbT5L+SJWZRoPcoUOAyv6xKx1a0ylcgP2apV0WrfAXyY/ZUverXzqWFGrahsTejrYIEK8FtBm4rl05ZUJJfbUrMosDgUaDAZb6OPdVY56eoOJdFq3YVyI/fifW9E9PU353L/BrV+9qrYWadddafWmUwbUVkYYtTbmw9Y4Nrp56ATdBZQlGgPypQ4DK/1EvHVrTKVyA/ZqlXRat8BfJjlno16Fp1CZcecfXVVwcrpwUbatsA2xC5M6FLKAr0VwUKXOaXfGmsi1b5CuTHLPWqaJWvQH7MnqxX6+x/chht7HHzE9eNmGfsuE747uuvunFl15c00yoLLkcdddR4DrgjIDmZjs1z0gs3s4SiQH9VoMBlfsn3ZGOdn6qyFVHRqh0F8uN2Yn3vxDRRtBPT1ZNp6tdwmf9KlZhFgf6jQIHL/LLuycY6P1WlYytataNAftxOrO+dmKYClyH0S7h0Jrgh8GbBud02VC+hKNAfFShwmV/qpWMrWuUrkB+z1KuiVb4C+TF7sl71S7i0EtwZ0M3CyiuvHM/lbhZWWGGF8Oijj4a55por3HTTTfEM6qEdnA0OChrP1h7a6erNz5933nnDu+++G1577bVuZWOhhRYKL730Uq+bZlHgMr+4e7Kxzk9VcS6LVu0okB+3E+t7J6apOJf91LlsfJVGHHHEMP/884fHHnssfPjhhy3fNEC5//77B+7mgw8+GNZaa62OOGP8tttuC4cffnjZQim/newy5lVXXRXPn7eLQF246KKLwnnnnddU8+uvvz5cfPHF4ZJLLunyWZ0UocBlfmmUjq1ola9AfsxSr4pW+Qrkx+zJetUvncvFFlssAuIvf/nLcP/994d55plngJMJBo4++uimpQUYrrjiinD++ecHw+uNcLnzzjsHzqezyp304162NzrssMMCiH3++efDn/70p/j7c889N55r/sknn4RddtklPPfcc/FnU045Zfj+++/DoYceGvfidN65+NNMM008z9w+nHfddVc44IAD4obvX375Zbz3vvvuG15//fVwwgknhDHHHDPed6ONNgojjTRSOO644+I9LVaafvrpA6fTiUTuMcooo4Ttt99+oDyLd9RRR8XrbDbPhVtnnXVi+hqf8c4774Q77rgj3t/vX3311bgC37/ffPPNsPrqq8c0nXXWWWG88cYLH330UdTh7bffrtWaDlxh57yvu+66Yb755gubbbZZfMYbb7wR8yD99F9zzTXDxBNPHHbccccwwQQTxLKxSKuuLMCiF4zDKw9PP/101Nc1yuKbb74JRx55ZFDOreByp512is/98ccfY3lvvPHGA3YZsDn/fvvtF++hPt1+++3xb3+UZ6MGo402Wq3O9mJ1YpTjSZXl+uuvH9577738lqSbMQtc5gvXk411fqqKc1m0akeB/LidWN87MU3FueynziVAsVIcNA033HDxzfrggw/C2GOPHeENDDYL3MpNN900PPPMMwPBpXsCCaBhiyMQCFxvvPHGCJWgY5999on/B0hgacMNNwy77rprGGGEESJMbbHFFvH/q666aph55pnDIossEq677rowxhhjxP03QTEAcm+Qy1UbZ5xx4l6dfgZ4DPkDK/AE+oAagAJSBx98cPw3wPHze+65J5xxxhnhggsuGCjLs88+e1w9D4peeOGF+OzVVlst7LXXXgM9A9jZG1S+Tj/99AjGoBJUn3TSSVETaQOFgHaPPfaIWq+00kq1UgMzf9xrm222CQsvvHCEOM94+eWXwxFHHBFOPPHECJ/Ad5JJJgnrrbdemGyyyaJTCLrrysJ0BtMaQLB8OU8eaAJYZUFHmi+xxBIt4RKwS5/n29JK2R5zzDER1DfZZJN4PU2vvfbaCOQvvvhi3JngkEMOGUgDZVKns/tMMcUUEfyVpbzTMwUfE4C9GuRdHR2UUOAyX73SsRWt8hXIj1nqVdEqX4H8mD1Zr/qlc6nzBChrr712BMS33norrLLKKrEDn3vuuSNccMUaA6DgfAFGm643OpfcRb//+uuvIyhyw8QDs/7v34ANHP7nP/+Jc/kAG+cKdHHy/IzrJYAJkCld9957b3TUOJEAD8wsueSSA9yyhx56KOy9997hgQceiC7hdNNNFyH5lVdeie4pmJS3H374Id6Hw7fnnntGODIlQJoag7R6tqkAgjQAw7pnONlIet3TnFaAI650AlgwuMMOO0So44By5cBzunfjs1vBpWdxeUGyj4Jvv/12ILiU18ayMOWhDi7pQ0vXSJOPBE5pV8Pi8nnmmWeG0UcfPSy77LJh+eWXj9ngTsq3/VKBP4hWZ/yd3N2qBuC5TucJJ5wwfmxI85VXXhmvrQag6qOjGi688MK4pdaghAKX+er1ZGOdn6riXBat2lEgP24n1vdOTBNFOzFdPZmmfguX6dhHEKSjN+QIhpZeeuk4ZFy3MIY7ppMHRCCtblicc8ahmnPOOePemRw7kAl0UgBGYBacHHvssTGeoU/uFSjh9AmgAihyxG644Ybo4nHYOGGPP/54mHTSSaPTJgAKoAqYpcH//R40gqfTTjstwpMAoNwLBM0yyywxLXUBXBpOB0YCSDzwwANrn5HgcsUVV4wACUC32267mIcEl9xKw+VpaNdCKC5oXajCJbd3pplmGuBcpmdwXA0Xy2NyLuWRpvRvLAsLdJRZci7lXzrtDMAlVCdMMQCW7cDljDPOGLWmvSDtN998cwRWUwM4q1xSLrOPgEYNpKNOZ/VDWQNXQ/bAUbwUfDxwvKuBQ97Ooi4fNP40htlmmy2/x+mhmD3ZMOZmqRPTVDq23NL7b7xOLMNOTFPRqtSrdhTot3BpFbBO3RA3QAGNhlIBSTO4TEdGAtFnn312AFxyLMcaa6zYqXOyLPbhUgFXcHDOOefE+ZAW3QAipwBx2wATF5LjaG4iMAB+fgaAzOsDdobFG+HS/UCjeIbzLSwxLM71BCDyY14mkDFvsgqXCUbNOTWnz/B4O3BZ9wyagLNWcKnBpLthf44ckE9uX+PzzVc1pM/tlHd6pWHxRrikJ5BTbnRRjobg68qCTgcddFB0YQHgNddcE8Ch3QGkCcAB1Vy4pB13GNCCQPXAPTjOJ598coRKQ+U+MqQRYDZq4KOmDi7p5SPCRweXUt30sZEC+HTvahDXhwXXE9SrszRW1znnCy64YJzn2moFfHEu85vQAgFFq3wF8mOWelW0ylcgP2ZP1qt+C5etiqMZXLoGIIIWw8lcMBDD9ZphhhnCcsstFztxLiQYAj0gw1xJ8wEF0AkQ7LMJ7MCIn2277bZxiJejaJjcvEnz54BhI1xyyAAsAOOYcccsLOHwAasFFlgg/t8wsLSAQcPVybmUDmBqOBxsNNtKqZlzWfcMEA2qmsGllewg3lxMUOsPF1Te6gIg2m233eJCJlAkL41wSSv39DeoMjzNeTR9gXNZVxacUuVEH7qJw7n2LGVmOgSnkXssHkA3ZF0XADvn17xRw/7mRypXsKlecDCtKFdXDGl7jnmYjRpweuvgEhgqU9Ms6KUcAXFXwUeGbZDM+5QXbniq04D0zjvvjLDaLBS47Erh//t9TzbW+akqblzRqh0F8uN2Yn3vxDRRtBPT1ZNp6pdwabELaGkWuFCApi5svfXWEd64i80CJ4yLWV3ZCxTMg3zyyScjTKYA/hpXAE811VTx+k8//bTLt56bJS6oSsF8Sv/nxoELYFZ9pnhgCNiaw8k1BR+NAUhzaOtCzjOaJZ5jy1GVRlAqHdXwxRdfxPmUgIozByBzgrw2DgnXlQV4B2zVMvazkUceObqlINXvQHdX2lSfaR6pj4JmK+Creahq0Cpv0qU+2CarsQxzNOlOnAKX+ar1ZGOdn6rSsRWt2lEgP24n1vdOTFOBy366Wjz/VRo4Jijl/ljpDZB6YzAHlMtmdbRVzKCIS9kYnnjiibjYaUgGi6cah3aBH407IQxNbYZW/gtc5itfOraiVb4C+TFLvSpa5SuQH7Mn61W/dC7zi6I+JhfSgp6ecpIGNb2N19smyVCsPJRQFGhUoMBlfp3oycY6P1XFuSxataNAftxOrO+dmKbiXBbnMv+tKjGLAv1EgQKX+QVdOraiVb4C+TFLvSpa5SuQH7Mn61VxLivlYkseK3qtULYPZQlFgf6oQIHL/FLvycY6P1XFuSxataNAftxOrO+dmKbiXBbn8mdvlRXUFuu0Wi2e/xqWmEWB3qlAgcv8cisdW9EqX4H8mKVeFa3yFciP2ZP1qjiXlXIpcJlfSUvMvqtAgcv8su3Jxjo/VcW5LFq1o0B+3E6s752YpuJc5tep3qbVMLPOOutP7WXvv3s/FueyXdVK/L6mQIHL/BItHVvRKl+B/JilXhWt8hXIj1nq1aBr1RIubU5et8+lbWccp1eGxfMLoMTsewoUuMwv09JYF63yFciPWepV0SpfgfyYfalejfiL/w3bj/1+OPj98fIFaCNmM61awqXOs1UocNlGCZSofU6BApf5RdqXGuv8XHcvZtEqX7eiVdEqX4H8mH2pXnUkXM4222zxmL5moSdPQ8mvFiVmUaBnFChwma9zX2qs83PdvZhFq3zdilZFq3wF8mP2pXrVkXCpKGyEbushRzL++te/Dk6jcVa3U2v87ejEEooC/VGBApf5pd6XGuv8XHcvZtEqX7eiVdEqX4H8mH2pXnUkXALKU045JYw44og/K5Xvv/8+DDfccOHGG28Me+21V22JOevZsLl5m50UpAkUNJ6t3Ulp7G1pmXfeecO7774bXnvttW4lfaGFFgovvfRSPA2pN4UCl/ml1Zca6/xcdy9m0Spft6JV0SpfgfyYfaledSRc3nHHHWHUUUeNIKYjBRBrrrlmhM2ffvopgIrvvvuutsSuvPLK4M+ll16aX6I9EPO2224Lhx9+eMecyd0DWR7ij7jqqqvC3XffHY4//vjaZ1100UXhvPPOa6r59ddfHy6++OKO+xDpSrgCl10p9H+/70uNdX6uuxezaJWvW9GqaJWvQH7MvlSvOg4unat99dVXh48//jgsscQSsVSGH374cNddd8UV5P5ss8024cEHHxyoxEDnIYccErcr+tOf/hQWXnjhMMEEE4Rjjz023lPYeeedw8orrxzvY3jd9kZzzDFHOOywwyK8Pv/88/Favz/33HPD5JNPHj755JOwyy67hOeeey7+bMoppwxc1EMPPTT89a9/DWeffXaMP80008TzzHffffeY3gMOOCAsvvji4csvv4z33nfffcPrr78eTjjhhDDmmGPG+2600UZhpJFGCscdd1y8Jxdt+umnj+7r559/Hu8xyiijhO23336g/Ip31FFHxet++9vfRhdunXXWielrfMY777wTQLv7+/2rr74aQd2/33zzzbD66qvHNJ111llhvPHGCx999FHU4e233659M+hw0003RYhfd911w3zzzRc222yz+Iw33ngj5kH611prrfhhMPHEE4cdd9wxlsf5558fFl100dqyAIteMB8W8vD0009HfV2jLL755ptw5JFHBmDYCi532mmn+Nwff/wxlvfGG28clllmmZiXv//972G//faL9wCXt99+e/zbH+XZqAE3vE5nuxeceuqpYdxxx41luf7664f33nsvvyXpZswCl/nC9aXGOj/X3YtZtMrXrWhVtMpXID9mX6pXHQeXIAUMcvp22223AaUCgpZbbrkIKwcddFC45pprBiqx7bbbLsw///xh1VVXDbvuumuEFFBpiF3nzw0FEkADxILAeeaZJw6zg0rQsc8++8T/AySwZI6ne9kCCUxtscUW8f+eMfPMM4dFFlkkXHfddWGMMcYIW265Zdh///0jALn3FVdcEV21ccYZJ6yxxhrxZ4Bn2GGHjWAFnkAfUANQQOrggw+O/wY4fn7PPfeEM844I1xwwQUD5Xf22WePeQNFL7zwQnz2aqutFqcMND4D2D3yyCMxX6effnoEY1AJqk866aSoibSBQkC7xx57RFBeaaWVat8MYOaPe4F9IA/iPOPll18ORxxxRDjxxBMjfALfSSaZJKy33nphsskmi04h6K4ri0cffTSssMIKEYLl66mnnoqgCWCVBR1p7sOjFVwCdunzfHVA2R5zzDER1DfZZJN4PU2vvfbaCOQvvvhi2HzzzePHSaMGyqROZ/eZYoopIvgrS3mnZwo+JgB7Nch73YdRfvMToptv0VunhU5sGDsxTcqtE9PViWkqWrX3lndiGXZimkq9GvL1quPgcvTRR49gadh7qaWWiu6XwPkDAjprAFU3z+7kk0+OQLT11ltHAORgJrfKPbiLnLGvv/46giI3zHO4bf7v34ANHDq73DMAG+cKcHDy/IzrJYAJkAks7r333uiocSKlD8w4Bz09/6GHHgp77713eOCBB6JLaF4px/SVV16J7imYnHvuucMPP/wQ78Ph23PPPSMcAWZpagzS6tlzzTVX/JU0AMO6Z2ywwQYxve55yy23RMARVzoBLBh0Zjuo44By5cBzunfjs1vBpWdxeUHyBx98EL799tuB4FJeG8viww8/DHVwSR9aukaafCT4COlqWFw+zzzzzKBOLbvssmH55ZeP2eBOyreyAf7qjI8Mfyd3t6oBeK7TecIJJ4wfG9JsKoZrqwGo+uiohgsvvDDcd9997b3ZDbELXObLVzq2olW+AvkxS70qWuUrkB+zL9WrjoNLxQD6fv/738cSMTxuWBns/PKXv4zDtM3cNHPswAwnCVwCEW5cNXDOOFRzzjlnXJHOsfM8oJMCMLJSHZxwUcUz9Mm9AiWcPgFUAEWO2A033BBdPA4bJ+zxxx8Pk046aXTaBEABVNdee+3o3vm/34NG8HTaaadFeBKk271A0CyzzBLTUhfApeF0YCSAxAMPPLD2GQkuV1xxxQiQAJTTKw8JLrmVhsvT0O5XX33VdOFUFS65vTPNNNMA5zI9g+PKMZbH5FzKI03p31gWpjU8/PDDA5xL+ZdOdYBLCI7VBWDZDlzOOOOMUWvaC9J+8803R2A1NYCzyiXlMvsIaNRAOup0Vj+UNXA1ZA8cxUvBxwPHuxo45O0s6vJB409jKM5lXoPdlxrrvBx3P1bRKl+7olXRKl+B/Jh9qV51JFyCOcDV6JoZIt12220jYNQF7huIUUBVuDSkPtZYY8VOnZPFEeVSce/AwTnnnBPnQ3JMAdH7778f3TbAxIXkOLoHMAB+fmbo1rw+YGdYvBEu3U8exBt77LHjwhLD4lxPAMLpMi8TyJjPV4XLBKNg2pw+w+PtwGXdM8wHBGet4JJuFk/RjiO39NJLD3D7Gp9vvqohfW6nvNMrDYs3wiU9gZx5pHQxJA7668qCTqY9cGEBoOkP4NDWU9IE4JRxLlzSjjsMaEGgeuAeHGdON6g0VO4jQxoBZqMGHN46uKSXjwgfHVxKsOxjIwXw6d7VIK4PC64nqFfXOaoAlnO+4IILxg+oVivgi3PZPxvr/Fx3L2Zf6ti6p0D+VUWrolW+Avkx+1K96ki4TEVhGHvaaaeNjiUIawaVKb45meYWmv8HkMChOZrmzM0wwwxxzqZOnAsJhkAPyDBX0nxAAXQCBJu4Azsw4meglivKUTRMbk6j+XPAsBEuOWQAFoBxzLhjFpZw+ICV4Xr/NwwsLWDQcHVyLqUDmBoOBxscxHbgsu4ZIBpUNYNLK9m5jOZi0tsfLqi81QVAZE6sMgJF8tIIl7RyT3+DKsPTytB8SM5lXVmYL6qc6EM3cQxRe5YyM02C08g9Fg+gG7KuC4Cd88vp9uFhfqRyBZvgloPJ7Tb1wJC255iH2agBp7cOLoGhMjXNgl7KERB3FXxk2AbJvE954YanU6cA6Z133hlhtVkocNmVwv/3+77UWOfnunsxi1b5uhWtilb5CuTH7Ev1qiPhEnCBK04lJwmcpJXYIIjrVxeADlfIMOIzzzzTtEQ5YVzM6speoGAe5JNPPhlhMgVpaVwBPNVUU8XrP/300y5rDTdL3CoYm0/p/9w4cAHMqs90UzAEbM3h5JqCj8bAaX322Wdr05DzjGaJB+UJ5kGpdFTDF198EedTAirOHIDMCfLaOCRcVxbgHbBVt5vys5FHHjm6pSDV70B3V9pUn2lqhY+CZivgq3moatAqb9KlPvTkqVEFLnNq23/j9KXGOj/X3YtZtMrXrWhVtMpXID9mX6pXHQmX3CogYL6d4UGAY6ELmLHQhpvXLBiyNvyZnMj8Yu2cmOaActmsjraKmRbmVzYGpxa99dZbQzThVmY3Du0CPw5bJ4Shqc3Qyn+By3zl+1JjnZ/r7sUsWuXrVrQqWuUrkB+zL9WrjoNLq8ItruEuWRxjaNmKXtBoYYsha8OvraDKPZoNJecX89CLaZskQ7GAuoSiQKMCBS7z60Rfaqzzc929mEWrfN2KVkWrfAXyY/aletVxcGk40rY8FlrYJ/H++++P8mobjAAAFPJJREFUQ6TmKVrQYeGF+Y9czRKKAv1RgQKX+aXelxrr/Fx3L2bRKl+3olXRKl+B/Jh9qV51HFwqBqu4AaUFFrZcMY/SKm4LN9JikPziKjGLAn1LgQKX+eXZlxrr/Fx3L2bRKl+3olXRKl+B/Jh9qV51JFxa3Z2OfrRi2NxDp+VYTf2Pf/wj7nlYQlGgvypQ4DK/5PtSY52f6+7FLFrl61a0KlrlK5Afsy/Vq46ES0VhlbWFOVZE29TcPEtnXtvSpnFldX7RlZhFgd6vQIHL/DLsS411fq67F7Nola9b0apola9AfsxSrwZdq2FmnXXWn/JvU2IWBYoCSQEbzDee/FPUKQoUBYoCRYGiQH9R4KOPPooHnzSGApf9pQaUfA52BTrVuezEdHVimlSITkxXJ6apaNVe89GJZdiJaSr1qu/WqwKX7ZVtiV0UGKBAaazzK0PRqmiVr0B+zFKvilb5CuTHLPVq0LUqcJmvYYlZFPiZAqUByq8QRauiVb4C+TFLvSpa5SuQH7PUq0HXqsBlvoYlZlGgwGU360BprPOFK1oVrfIVyI9Z6lXRKl+B/JjN6lWBy3wNS8yiwM8U+NOf/hROP/30jlOlE9PViWlScJ2Yrk5MU9Gqvde8E8uwE9NU6lXfrVcFLtsr2xK7KFAUKAoUBYoCRYGiQFGghQIFLkv1KAoUBYoCRYGiQFGgKFAUGGwKFLgcbFKWG/VXBX7xi1+E0UYbLXzyySf9QgJ5FT777LOf5fc3v/lN+PDDD5tqMMYYY8RrGg9fqLvO8bJOBfvqq696vabyPdxww4V///vfA/JStBq4WH/1q1+FcccdN7z11lulXnVR670fwn/+85+iVROtxhxzzPDpp5/+rL2pa4Ny2u92rmvWzg2Jhuy3v/3tz9oVzxh//PHjz3744YcBj8zJY12b1Oy6nDwWuBwSJV7u2W8U2HvvvcNSSy0VIciLuNFGG4U33nijT+Z/+OGHD1dccUUEaY32MMMME9Zcc80w33zzBTp8+eWXYeSRRw4HHnhg+Pvf/z5Ag9/97nfh3HPPjY38iCOOGH8nDt0ar3MS2Pnnnx8bSOHdd9+Nz+itp4HJ78033xyPy91mm21q81y0CuGEE04IM800U/z4AOKrrLJKmH/++Uu9qmlJLrnkkgAuf/nLX8b3cJ111onHNJd38L9iOVVQm+R9W3311cNrr70WmrVBXbXf7Vynjapr54ZEZzDbbLOFZZZZJm5ePs8888RHTD/99OHkk08O33zzTWwv5XvLLbeM9aJVH1XXDmuT6q5rJ48FLodEyZd79gsFOC0PPPBA+OMf/xi/FDX6TivYeuut+2T+V1tttZi3BRZYIObvzjvvDNddd11YfvnlIxBqWLfddtuw4oorhkUWWWSABsBBZwi8J5988nDZZZeFRRddNFxzzTUDXXfiiSeGHXbYIYKF8OCDD4YjjzwyXHXVVb1SU7pMMMEE8ehcnd0dd9xRtGooydlnnz3CpU5S56WeWYF65plnFq1qtAIQc889d9D+3HPPPfHj67TTTita/f9aaYPmnHPOMPXUU8e6BLLq2qCll1463HjjjS3b73auM9JS1841jvAMjoZMuiaZZJIw1lhjDYBL9cKH2aabbho/8u+6666w/vrrh/POO69lHuvaJMBZ17e1k8cCl4OjpMs9+qUCM888czj11FPDXHPNFfN/+OGHhymmmCKsvPLKfVIPnZlG6+OPPw4zzjhjOOuss8KGG24Y/waOzz77bHRQuJJzzDHHAA2uvvrqCIlHHHFEdHcfeeSR2ADSrvE6wPmHP/whONtX4Prdfffd4eCDD+51mm611VZRB50LlxdcPvzww0WrhpJcb7314qp9dYMbd//998cPjKJVfZX3Pvz4449RKy6Vd65oNbBWPlASXNa1QYcddljYaaedWrbf7VyXPoYb27knnnhiiLRdPuC1tcm5HHXUUePHmREkjqX3yt+gs1UfVVd3vI91fVs7eSxwOUSKvdy0Pyjg5T7ooIOiiyDsvvvuwXBFX4XLVKYa5DXWWCM4W33HHXcMjz76aFhrrbXCyy+/HGaZZZbYKHEOUjDEAhKPO+64+KMEl5ypxutuv/32OKzl/oLGXfxDDz20V1UpgHz88cfHoasDDjhgAFwWrQYuxj333DO63UcddVT44osv4nCcj4+6+tHf69W0004bnajXX389wqVh2+TAlXfw53WrCpd1bdApp5wSP2patd/tXGfaUF079+STTw6RtqsRLj3ENJxjjjkmfqD7+7333uuyj6prk/7yl7/UXtdOHgtcDpFiLzftDwr4UjSkwJ3yxahzFMBXXw0nnXRSBMj99tsvNqQCJ+WQQw4JN910Uxx+2X777aObkgLY9DW98847xw6RiwkSDKs3XnfOOedENzRdf8stt4Rjjz32Z3M4e4O23NwZZpghfPvtt+HXv/51TDJ9FlxwwaJVQwFyVwztpqkQ3OvHH388LLbYYkWrBq2A+MILLxynlQiGxU0z4dCVd7A5XNa1QeqXj9lW7Xc71xkSr2vnhtR88Ua4NKrkY5yrbWqAj/2cPqqu/V511VVr+7Z28ljgsjf0VCWNHauAeSmgkot35ZVXxiFizkJfDCbJH3300WHZZZf92QrFCy+8MC4sAJXmGH799ddh4403jnMxNVwcFcPcGkNxOLvmbdZdt++++8YGcoUVVogrh80l4/5VV1r3Bm05StIvGB4H1bvttlsEgKLVz0uQy+IwAh2aHRdAOEecbkWrn2u13HLLBa7S4osvHr7//vsIl+rVJptsUrRqaBiqzuUWW2xR2wbVtd/XX399HEbmWrZzHdewrp0bUu1VI1zuv//+Md3VD3vP7iqPzdrvuuvayWOByyFV8uW+/UIBjY95g8L7778fgWpIfakObUHT8GU1HX/961/D3/72tzgULvhqBgnvvPNOXJSx3Xbbhaeffjou/PHVa+6hRlADDirqrrMwaLrppotxrR7XefbmAMhNtDfnslme+7tW5itzdS0YeP7556N7XbSqr/XeN1vQ0Mq2TYCmaDWwVt4pbZHdOzh4dW1QXfttMYuRGQvN2rmOc1j3jCHVdjXCJcd/ookm+tnjjArMOuusA/VR1Tw2qzt12rSTxwKXQ6rky337jQKjjz56GHvsseMwRH8NFvtMNdVUcVV0gmsrvDfbbLMBe18CxldffTUuQkih7jq/4/yJ19scy5zyL1rVq6QjH3bYYX+2X2zRql4rrrj3rPp+FK26fvvq2qDG9tv/zz777J/Nnc+5Lj29Lm7XKRuyMbrKY7N2uFnflpPHApdDtkzL3YsC/VYBi3IuvfTSfpv/djJetMpXq2hVtMpXoP2YtktjFjz00EPtX9xLruiJPBa47CWVoSSzKFAUKAoUBYoCRYGiQG9QoMBlbyilksaiQFGgKFAUKAoUBYoCvUSBApe9pKBKMosCRYGiQFGgKFAUKAr0BgUKXPaGUippLAoUBYoCRYGiQFGgKNBLFChw2UsKqiSzKFAU6F0KOM7QcaC2Qvnuu+86KvH23bStyFdffTXU02aXAfrQqYSeV8CJWDb/rtuZwSriKaecMu6E0Wl1uOeVKk9sR4ECl+2oVeIWBYoCRYEMBWysb9/GFN5+++2wwQYbxHPGOyE49s4fG/6fcMIJQyVJNtPfZZdd4gbzgqMfr7jiingW8tAKCy20UDztxklUzz77bFYybAtk78mXXnopHhiQgpOszjjjjHDffffFwwO6ExzNZ7/XarCH7Prrrx83TW8W7L1rj0OHOjjmsFkAlpdffnk48cQTg/1lG4NNs52k5SQym7eXUBTIVaDAZa5SJV5RoChQFMhQYOutt44gaUP5Z555JjgPmgNkw+tOOXd+3nnnjae6gMu77rorI1eDN8rvf//7eJqTYMuXzz//PJ46A6RWWmmlAMaHRgC74PLggw+OJ0XlhASX3D3HWKaQTh5y0onj+LoTwGXaWB6Eg0Eb8jv5qtUxs07Isgm2/RpbwbqtwtxznnnmCT/88ENtEh10YCNupwM5q7qEokCOAgUuc1QqcYoCRYGiQKYCjgIdYYQRghONHGUICpyRDjCdqmGo3HnpY4wxRgQHDhkQGH/88SPscabGG2+8GN8JSHPPPXc8ShJwgZd11103wiunDLhyQ7lT4k4//fTxOFL3dq4512nvvfcOxx9/fJh55pnjaUkzzjhjdCudGOQ6gAFAZppppgh3r732WlhnnXXiJt2rrLJKdN3kx3GDTiA59NBDI4C5XlzDpsDk4osvjicugRXHXE466aQ/S0NVPmAJMKuQxI1bffXVYz7cxwkpnu3+jhQFfM6z9/vk5jmphl6cOk6edLmPfQo/+OCDqKtw0UUXRb2kTZnccMMNA+nkhBM6gDcb+Hu259SlAYwddNBB8dQpHw2TTDJJHDqug8uPP/44DD/88LE8nRrj44PTadN4R/V5lnwJSy655M9qGbj0+3TuOmfU0Zivv/562GOPPaIzqb7tvvvuYfPNNw9rr712OPDAA8OEE044AC4drfnnP/85LL300vGcewcdONxA2m+99db4AaQ+NSs3HyLHHXdcPA5RXSqhKJCjQIHLHJVKnKJAUaAokKGAeZaPPPJIhMY555yz9ihQR1+CR526v8caa6wIZqAHdLkWtNnoWPjoo4+C4UmAB34AlWPtBGA1wQQTxOc4V/iyyy6LUAfaZphhhjDmmGNG2Nprr70i1Ar/+te/ItykYXEQ6t9ARvq5VIATAHPdhMceeyzCJ0ACt4aPuWPJVQO5nFrQdc4558TjO90DTEuvuE8++eQABQ2zgiuOnmeIB4gFsCYtwBgAPfzwwxE0nd4DxoChdIA2QR7lG2iBePNIaTvHHHNE8AaY9BVAMNDmjjbqtPPOO8f0OB3K9eDNuct1aZA35SatwF96msGl5wLd3/zmNxFWPQcES5+jQX1cSDvwd+xlNdBB2QI7+Qd6o402Wqwnyth1dKVvOp7VUZqjjDLKALikk2f6OPGBACJpns4lV87y6cOmWbkpA/WGbiUUBXIUKHCZo1KJUxQoChQFMhTgUJljB2LAXmMAQgDIOfTLLLNMBCygBSB32GGHCA2cpfXWWy/GEx+wATqQZKgWbIBLcYESQFlggQXime0AwLUAZOqpp46uqd878x5ccts4dNU5l9IIQN333XffjW7WmWeeGVZcccUIJTfeeGOEUy6meXfSyzkDNOZIHnbYYeHKK6+M7t0KK6wQjjjiiHgUqAUgb775ZrjgggsiOKdjQT0vubvzzTdfdOaADoAW3BsocRuBIseRewnywLXhfKAF3jm15go6jxwAc1xpKx/gWrytttoqzqEEWcsuu2xM12yzzVarE8cvDYuDvro0gGcQCNbAlrLZbrvtmsLliy++GB3FP/7xj7GMTAMAclzjF154IX5I+J0hedo2wmXjnEuASGcfJjlwqW4oe8CsDDjXnGDg6necWXUWgDcrN+UDoOvqdMZrUaL0QwUKXPbDQi9ZLgoUBYacAuAB1IGgf/zjH/FBoBB4GkrmsqX5lzrs+++/P67WBUGAMTlRoM7wLhAybGk41Xnt3EJwmebTGS7m6BmKBo0cNUAD1iaaaKKfweVSSy0Vz3qvwuVpp50WIZIbCWYT/LmHPABHQ+Ge4VmcWS4buAR70sHJAzCgB/yAUU5nAsb99tsvcGxToAeHkMtmQQl4NKfPsC+4dD8gCy45ginQRz65pIaKXXfJJZdEuORSgh9gWV35bIgdyD311FNx+FwwDF2nUxUuQV1dGsxRBZNpAU+aW9nMuUzDzhZ4mbIA6nwoADbD1NJt5b6PiMZQHRanpQVCPhroAESBdbo/XcxbbXQu1RUfMeLRTQDawBtcJqjlMDcrNx8DpgsUuBxy7UZfu3OBy75WoiU/RYGiwFBVIHXyIEunzCkyJApKQNxtt90WnUWgyCnye8OegKoduAReIM08RTBryBSoeq7hXZABKgx7cuzAH/j45JNPfgaXhrQN0XLkOJ+GgwEat+3222+P8ya5WsARkHG6uILN4JJTaQifSwpeDHk3ztdbbLHFIrACZQBuviRYlA9w6Rnmd8rLkUceGf7nf/4nDv+DQ65fHVzSFbxLuzQAY04dLaQ/QbvKAdrqdDLvlXPJvaWH6+rSAKiBIReY+zrOOOM0dS6l4dprr41aqAfmLdKDi8mxTDDPuW4Fl35nBTgN5HXfffeNjqP0+RBxL/o1wqVpDkBfXB8MAB5oK1tzNWllTiVYrSs3z/FB0UkL0obqC14enqXA/9fOHdw2CARRGG7BHbgtGnAPvrsAGuToFnyPPqSREFpg7UMclDfHeL0MP0h+eTM7EZddmLIoBEIgBPoIcCOVYY2iqeCkES0cSiLAD7b+RuGk9DAMs3DzQ18iiADhJnEula0JJy6i4EYpJ3NDhRI6oaBMXg6YfYlLIuJ6vTbFJeeLkFJerb3sS2Dq6eNkEZUVVbLnvC3FJfeQi2gth46zxxkTXDLicD2GyR6EUpV9rcOE+LXeARVl7sqL0+cwi37BEpf6UjnBnEvlaffPXbMP4TqO43yIZi0utzgRgT6T0/1+n5m1cqhRP/X8cN4aRaQM750QhK9SurYJzm2dSOdMcxZb4lIJmzAVpg14zlWSrz5Jn71er7l1gLjkhNYoIq0L3GnvgJAP55pYJCjLSSU6W8+Nm+s6DlIRxIkQ6CEQcdlDKWtCIARC4E0CfuA5Rg7drAeEc5i4fwSXnrt3gughLh+PxyycOJgEWQX3jYBQ/n4nOKgcRKJjGe5Db+M0Td17uj+HcIi+vXmMRGAxag3xlodrP5/P3X2W+bq27xDCe4O/tzgRs9oD5FPjeVo5cJ+5qa5zFBha712o8I+Dw0nme3J3Pw25EtLLd6C11+VymScS6POs/le9sIQx99XfWs+t2i6Id88zEQI9BCIueyhlTQiEQAj8EQJLcal0nDgfgeoVlTlXUS/lN8KYK2Opqnd2nQOhzcFWRr/dbt9IMdc8KYGIy5M+uKQdAiHwPwk40KH061BKhlqf8x3gFmp1MG6pVQ7/zbviMGvXaLmSddBIvyaHPBECvQQiLntJZV0IhEAIhEAIhEAIhMAhgYjLQ0RZEAIhEAIhEAIhEAIh0Esg4rKXVNaFQAiEQAiEQAiEQAgcEvgBYyeGBlE6KAMAAAAASUVORK5CYII=", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/troubleshooting.html\n" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.cumulative_num_comparisons_from_blocking_rules_chart(\n", - " list(blocking_rules.values())\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "lead_generation_experiments", - "language": "python", - "name": "lead_generation_experiments" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/notebooks/models/splink/WL_splink-tests.ipynb b/notebooks/models/splink/WL_splink-tests.ipynb deleted file mode 100644 index fbcb94b..0000000 --- a/notebooks/models/splink/WL_splink-tests.ipynb +++ /dev/null @@ -1,3033 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "776b2be3-a092-481c-bb3b-2b659affd578", - "metadata": {}, - "source": [ - "# Splink tests\n", - "\n", - "Bringing Sarah's code in leads to some inevitable bugs with our versions of packages, and I need a place to work out how some of these functions work." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "5661e8cc-24a8-4025-96f1-02faab428c81", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "InteractiveShell.ast_node_interactivity = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "82660568-9ccb-447e-b6d5-1be5791f5197", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RendererRegistry.enable('mimetype')" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import duckdb\n", - "import pandas as pd\n", - "import random\n", - "import datetime\n", - "\n", - "import altair as alt\n", - "alt.renderers.enable(\"mimetype\")\n", - "\n", - "from cmf.models import model_train as ld\n", - "from cmf.features.clean_complex import clean_comp_names\n", - "from cmf.config import stopwords\n", - "from cmf.config import settings\n", - "from cmf.features.clean_basic import (\n", - " remove_notnumbers_leadingzeroes,\n", - " clean_company_name,\n", - " array_except,\n", - " array_intersect,\n", - " list_join_to_string,\n", - ")\n", - "\n", - "from splink.duckdb.linker import DuckDBLinker\n", - "from splink.charts import save_offline_chart" - ] - }, - { - "cell_type": "markdown", - "id": "36e23c6e-d12f-45da-bfe2-b5d7cd79151b", - "metadata": {}, - "source": [ - "## From Sarah's tests\n", - "\n", - "I'm just aiming to replicate her code so I can explore it a bit and see why it's not running for me." - ] - }, - { - "cell_type": "markdown", - "id": "538bbb99-ae70-4076-a9c4-81cdbb316266", - "metadata": {}, - "source": [ - "Sarah's comments:\n", - "\n", - "Read in Companies House data, return `company_number`, `postcodes` and `company_name` split into: 'unusual' tokens, most common 3 tokens and most common 4 to 6 tokens." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5e4021a2-409a-4f3a-8ab5-852d567a4097", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/pandas/io/sql.py:1410: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " meta = MetaData(self.connectable, schema=schema)\n" - ] - } - ], - "source": [ - "df_ch = ld.comp_house_read()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cab2f9ad-5c3d-464d-ba5e-e3b09e7630d0", - "metadata": {}, - "outputs": [], - "source": [ - "df_ch_clean = ld.clean_numbers_and_names(df_ch)\n", - "df_ch_clean.reset_index(inplace=True)\n", - "df_ch_clean.rename(columns={\"index\": \"unique_id\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "11bafe83-0374-4e73-ae3a-07904d98251e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
unique_idcomp_num_cleanname_unusual_tokenssecondary_name_unusual_tokensnames_tokens_stopwordspostcodepostcode_altname_unusual_tokens_first5name_unusual_tokens_last5postcode_area
count5.350528e+06535052853504545350519535052853505280.0535045453504545265568
uniqueNaN51607615191154506352747947538NaN601612576680488
topNaN1propertylimitedNaNconsuvicesN
freqNaN22294840406441678481701NaN120371177742176447
mean2.675264e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
std1.544565e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
min0.000000e+00NaNNaNNaNNaNNaNNaNNaNNaNNaN
25%1.337632e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
50%2.675264e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
75%4.012895e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
max5.350527e+06NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " unique_id comp_num_clean name_unusual_tokens \\\n", - "count 5.350528e+06 5350528 5350454 \n", - "unique NaN 5160761 5191154 \n", - "top NaN 1 property \n", - "freq NaN 22 29 \n", - "mean 2.675264e+06 NaN NaN \n", - "std 1.544565e+06 NaN NaN \n", - "min 0.000000e+00 NaN NaN \n", - "25% 1.337632e+06 NaN NaN \n", - "50% 2.675264e+06 NaN NaN \n", - "75% 4.012895e+06 NaN NaN \n", - "max 5.350527e+06 NaN NaN \n", - "\n", - " secondary_name_unusual_tokens names_tokens_stopwords postcode \\\n", - "count 5350519 5350528 5350528 \n", - "unique 506352 747 947538 \n", - "top limited \n", - "freq 4840406 4416784 81701 \n", - "mean NaN NaN NaN \n", - "std NaN NaN NaN \n", - "min NaN NaN NaN \n", - "25% NaN NaN NaN \n", - "50% NaN NaN NaN \n", - "75% NaN NaN NaN \n", - "max NaN NaN NaN \n", - "\n", - " postcode_alt name_unusual_tokens_first5 name_unusual_tokens_last5 \\\n", - "count 0.0 5350454 5350454 \n", - "unique NaN 601612 576680 \n", - "top NaN consu vices \n", - "freq NaN 120371 177742 \n", - "mean NaN NaN NaN \n", - "std NaN NaN NaN \n", - "min NaN NaN NaN \n", - "25% NaN NaN NaN \n", - "50% NaN NaN NaN \n", - "75% NaN NaN NaN \n", - "max NaN NaN NaN \n", - "\n", - " postcode_area \n", - "count 5265568 \n", - "unique 488 \n", - "top N \n", - "freq 176447 \n", - "mean NaN \n", - "std NaN \n", - "min NaN \n", - "25% NaN \n", - "50% NaN \n", - "75% NaN \n", - "max NaN " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_ch_clean.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ac5a4047-2765-4499-b5cd-1320c5cd793e", - "metadata": {}, - "outputs": [], - "source": [ - "df_dh = ld.data_hub_read()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "94bb131e-e3f7-4f79-b21f-a526f93a895f", - "metadata": {}, - "outputs": [], - "source": [ - "df_dh_clean = ld.clean_numbers_and_names(df_dh)\n", - "df_dh_clean.reset_index(inplace=True)\n", - "df_dh_clean.rename(columns={\"index\": \"unique_id\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "4470d6f1-dbcd-44f4-af49-4756bb323c2e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
unique_idcomp_num_cleanname_unusual_tokenssecondary_name_unusual_tokensnames_tokens_stopwordspostcodepostcode_altname_unusual_tokens_first5name_unusual_tokens_last5postcode_area
count176050.000000907421760461760481760501760500.0176046176046115526
uniqueNaN897571685173347641290830NaN5477753102271
topNaNbarclayslimitedNaNconsutionsEC
freqNaN143341402448127812088NaN209933195251
mean88024.500000NaNNaNNaNNaNNaNNaNNaNNaNNaN
std50821.401783NaNNaNNaNNaNNaNNaNNaNNaNNaN
min0.000000NaNNaNNaNNaNNaNNaNNaNNaNNaN
25%44012.250000NaNNaNNaNNaNNaNNaNNaNNaNNaN
50%88024.500000NaNNaNNaNNaNNaNNaNNaNNaNNaN
75%132036.750000NaNNaNNaNNaNNaNNaNNaNNaNNaN
max176049.000000NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " unique_id comp_num_clean name_unusual_tokens \\\n", - "count 176050.000000 90742 176046 \n", - "unique NaN 89757 168517 \n", - "top NaN barclays \n", - "freq NaN 143 34 \n", - "mean 88024.500000 NaN NaN \n", - "std 50821.401783 NaN NaN \n", - "min 0.000000 NaN NaN \n", - "25% 44012.250000 NaN NaN \n", - "50% 88024.500000 NaN NaN \n", - "75% 132036.750000 NaN NaN \n", - "max 176049.000000 NaN NaN \n", - "\n", - " secondary_name_unusual_tokens names_tokens_stopwords postcode \\\n", - "count 176048 176050 176050 \n", - "unique 33476 412 90830 \n", - "top limited \n", - "freq 140244 81278 12088 \n", - "mean NaN NaN NaN \n", - "std NaN NaN NaN \n", - "min NaN NaN NaN \n", - "25% NaN NaN NaN \n", - "50% NaN NaN NaN \n", - "75% NaN NaN NaN \n", - "max NaN NaN NaN \n", - "\n", - " postcode_alt name_unusual_tokens_first5 name_unusual_tokens_last5 \\\n", - "count 0.0 176046 176046 \n", - "unique NaN 54777 53102 \n", - "top NaN consu tions \n", - "freq NaN 2099 3319 \n", - "mean NaN NaN NaN \n", - "std NaN NaN NaN \n", - "min NaN NaN NaN \n", - "25% NaN NaN NaN \n", - "50% NaN NaN NaN \n", - "75% NaN NaN NaN \n", - "max NaN NaN NaN \n", - "\n", - " postcode_area \n", - "count 115526 \n", - "unique 271 \n", - "top EC \n", - "freq 5251 \n", - "mean NaN \n", - "std NaN \n", - "min NaN \n", - "25% NaN \n", - "50% NaN \n", - "75% NaN \n", - "max NaN " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_dh_clean.describe(include='all')" - ] - }, - { - "cell_type": "markdown", - "id": "ff5b94ee-cd68-472d-a431-11af8d8cf630", - "metadata": {}, - "source": [ - "Sarah's comments:\n", - "\n", - "Instantiate the linker" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3a2b9d75-0896-41dd-814e-2f87d97ff9b2", - "metadata": {}, - "outputs": [], - "source": [ - "linker = DuckDBLinker(\n", - " [df_dh_clean, df_ch_clean],\n", - " settings,\n", - " input_table_aliases=[\"datahub\", \"companies_house\"],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "32cdee89-1572-419a-ade0-464e3a10b3b0", - "metadata": {}, - "source": [ - "Sarah's comments:\n", - "\n", - "This is how you do a deterministic link. It uses whatever rules you specify in 'blocking_rules_to_generate_predictions'\n", - "\n", - "`linker.deterministic_link().as_pandas_dataframe()`\n", - "\n", - "Determine probability two random records match i.e. the prior. Should admit very few (none if possible) false positives. [Linker docs](https://moj-analytical-services.github.io/splink/linkerest.html#splink.linker).\n", - "\n", - "`Linker.estimate_probability_two_random_records_match`\n", - "\n", - "This assumption is important to what we think 'a company is'. If just using equality on name, we - for instance - think astrazeneca cambridge and astrazeneca macclesfield are 'the same' comp may need revisiting / alternative models building." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "2feac536-edf5-49b1-af99-b7f76993e42b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10798 coop genossenschaft gruppe\n", - "169844 mango scripts\n", - "109134 qnap systems\n", - "69510 cypfer\n", - "88599 architects bradley steffian\n", - "Name: name_unusual_tokens, dtype: object" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "4428412 dark grimsby kitchen\n", - "2217747 ostereo publishing\n", - "5139079 distribution symmetry\n", - "2203595 bovingdon court orchard\n", - "832055 bargains nifty\n", - "Name: name_unusual_tokens, dtype: object" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_dh_clean.name_unusual_tokens.sample(5)\n", - "df_ch_clean.name_unusual_tokens.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "f5aaea87-86e8-4040-af8f-43791ebcb742", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 1.89e-07.\n", - "This means that amongst all possible pairwise record comparisons, one in 5,302,807.68 are expected to match. With 941,960,454,400 total possible comparisons, we expect a total of around 177,634.29 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " recall=0.7,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "f8d88c07-f9f2-4161-a366-39f99d311744", - "metadata": {}, - "source": [ - "Sarah's comments:\n", - "\n", - "But let's do probabilistic linkage instead. Increased `max_pairs` so that the model more likely to encounter the required comparison levels.\n", - "\n", - "NOTE: random sampling and can't set seed anymore" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "24069b5a-f428-4c53-a9ae-805cc18cdb0b", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for comp_num_clean - Exact match (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for postcode_area - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some m values are not trained).\n", - " - postcode_area (some u values are not trained, some m values are not trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(max_pairs=1e7)" - ] - }, - { - "cell_type": "markdown", - "id": "b15cc4be-a5a3-40f5-8a0d-969f2d937c47", - "metadata": {}, - "source": [ - "That warning is because we've got a bunch of values in Companies House that aren't in Data Hub, and vice versa.\n", - "\n", - "Should we clean these? Could provide weird signal." - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "eab1ded7-e771-4fb7-89d4-7102cfd25e1b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['KIGALI', 'BFPO', 'BHI', 'BJL', 'BROOKHILL']" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "random.sample(\n", - " list(set(df_dh_clean.postcode_area).difference(df_ch_clean.postcode_area)),\n", - " 5\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "5305b102-9ccf-4b1d-9de9-967ca16c484d", - "metadata": {}, - "source": [ - "Sarah's comments:\n", - "\n", - "If we can treat company number as a partially-completed label we can estimate the m values from the numbers." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "f55f4591-c408-40ee-99da-ee6e1231889c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "133969 NaN\n", - "165627 9307691\n", - "152270 NaN\n", - "89588 4917626\n", - "25141 NaN\n", - "Name: comp_num_clean, dtype: object" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "4349781 1931158\n", - "4718797 5505973\n", - "17037 14224009\n", - "1971377 6070287\n", - "3807590 13628591\n", - "Name: comp_num_clean, dtype: object" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_dh_clean.comp_num_clean.sample(5)\n", - "df_ch_clean.comp_num_clean.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "ef618a05-82d4-4223-8318-a1b6d6e73539", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['1370190', '11095559', '7277274', '10604962', '7109059']" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "random.sample(\n", - " list(set(df_dh_clean.comp_num_clean).intersection(df_ch_clean.comp_num_clean)),\n", - " 5\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "2ef55089-05cd-48d5-ac50-f1d3520c560d", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "---- Estimating m probabilities using from column comp_num_clean -----\n", - "m probability not trained for comp_num_clean - Jaro_winkler_similarity >= 0.75 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "m probability not trained for comp_num_clean - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "m probability not trained for postcode_area - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - comp_num_clean (some m values are not trained).\n", - " - postcode_area (some u values are not trained, some m values are not trained).\n" - ] - } - ], - "source": [ - "linker.estimate_m_from_label_column(\"comp_num_clean\")" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "55b968d3-eab5-44b8-a8c6-07573e059b38", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v4+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.2.json", - "config": { - "header": { - "title": null - }, - "mark": { - "tooltip": null - }, - "title": { - "anchor": "middle" - }, - "view": { - "height": 60, - "width": 400 - } - }, - "data": { - "values": [ - { - "bayes_factor": 1.8857938078260088e-7, - "bayes_factor_description": "The probability that two random records drawn at random match is 0.000 or one in 5,302,807.7 records.This is equivalent to a starting match weight of -22.338.", - "comparison_name": "probability_two_random_records_match", - "comparison_sort_order": -1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "", - "log2_bayes_factor": -22.33832472345254, - "m_probability": null, - "m_probability_description": null, - "max_comparison_vector_value": 0, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": null, - "tf_adjustment_column": null, - "tf_adjustment_weight": null, - "u_probability": null, - "u_probability_description": null - }, - { - "bayes_factor": 4948061.444887786, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 4,948,061.44 times more likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 2, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 22.238431985096113, - "m_probability": 1, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\"comp_num_clean_l\" = \"comp_num_clean_r\"", - "tf_adjustment_column": "comp_num_clean", - "tf_adjustment_weight": 1, - "u_probability": 2.020993496419037e-7, - "u_probability_description": "Amongst non-matching record comparisons, 0.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 1.3654800037318084, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.75` then comparison is 1.37 times more likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.75", - "log2_bayes_factor": 0.4494081872425724, - "m_probability": 0.025000000000000022, - "m_probability_description": "Amongst matching record comparisons, 2.50% of records are in the jaro_winkler_similarity >= 0.75 comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"comp_num_clean_l\", \"comp_num_clean_r\") >= 0.75", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.01830858008295684, - "u_probability_description": "Amongst non-matching record comparisons, 1.83% of records are in the jaro_winkler_similarity >= 0.75 comparison level" - }, - { - "bayes_factor": 0.02546625581726258, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 39.27 times less likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -5.295269327176444, - "m_probability": 0.025000000000000022, - "m_probability_description": "Amongst matching record comparisons, 2.50% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9816912301278894, - "u_probability_description": "Amongst non-matching record comparisons, 98.17% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 9073085.184755592, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 9,073,085.18 times more likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 3, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 23.11316177321052, - "m_probability": 0.9126429163214581, - "m_probability_description": "Amongst matching record comparisons, 91.26% of records are in the exact match comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\"name_unusual_tokens_l\" = \"name_unusual_tokens_r\"", - "tf_adjustment_column": "name_unusual_tokens", - "tf_adjustment_weight": 1, - "u_probability": 1.005879364887769e-7, - "u_probability_description": "Amongst non-matching record comparisons, 0.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 2.525226260074343, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.8` then comparison is 2.53 times more likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.8", - "log2_bayes_factor": 1.3364126590180854, - "m_probability": 0.0018668876001104668, - "m_probability_description": "Amongst matching record comparisons, 0.19% of records are in the jaro_winkler_similarity >= 0.8 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"name_unusual_tokens_l\", \"name_unusual_tokens_r\") >= 0.8", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0007392951790607094, - "u_probability_description": "Amongst non-matching record comparisons, 0.07% of records are in the jaro_winkler_similarity >= 0.8 comparison level" - }, - { - "bayes_factor": 0.12707702474714255, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.6` then comparison is 7.87 times less likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.6", - "log2_bayes_factor": -2.976224877129608, - "m_probability": 0.011411212372272854, - "m_probability_description": "Amongst matching record comparisons, 1.14% of records are in the jaro_winkler_similarity >= 0.6 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"name_unusual_tokens_l\", \"name_unusual_tokens_r\") >= 0.6", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.08979760420877689, - "u_probability_description": "Amongst non-matching record comparisons, 8.98% of records are in the jaro_winkler_similarity >= 0.6 comparison level" - }, - { - "bayes_factor": 0.08145354849808316, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 12.28 times less likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -3.617878639101611, - "m_probability": 0.07407898370615852, - "m_probability_description": "Amongst matching record comparisons, 7.41% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9094629401922473, - "u_probability_description": "Amongst non-matching record comparisons, 90.95% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 1, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 1.00 times more likely to be a match", - "comparison_name": "postcode_area", - "comparison_sort_order": 2, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 0, - "m_probability": 1, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 1, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\n regexp_extract(\"postcode_area_l\", '2')\n = \n regexp_extract(\"postcode_area_r\", '2')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 1, - "u_probability_description": "Amongst non-matching record comparisons, 100.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 0.03125, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 32.00 times less likely to be a match", - "comparison_name": "postcode_area", - "comparison_sort_order": 2, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -5, - "m_probability": 0.050000000000000044, - "m_probability_description": "Amongst matching record comparisons, 5.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 1, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 1.6000000000000014, - "u_probability_description": "Amongst non-matching record comparisons, 160.00% of records are in the all other comparisons comparison level" - } - ] - }, - "resolve": { - "axis": { - "y": "independent" - }, - "scale": { - "y": "independent" - } - }, - "selection": { - "zoom_selector": { - "bind": "scales", - "encodings": [ - "x" - ], - "type": "interval" - } - }, - "title": { - "subtitle": "Use mousewheel to zoom", - "text": "Model parameters (components of final match weight)" - }, - "vconcat": [ - { - "encoding": { - "color": { - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 0, - 10 - ], - "range": [ - "red", - "orange", - "green" - ] - }, - "title": "Match weight", - "type": "quantitative" - }, - "tooltip": [ - { - "field": "comparison_name", - "title": "Comparison name", - "type": "nominal" - }, - { - "field": "probability_two_random_records_match", - "format": ".4f", - "title": "Probability two random records match", - "type": "nominal" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Equivalent match weight", - "type": "quantitative" - }, - { - "field": "bayes_factor_description", - "title": "Match weight description", - "type": "nominal" - } - ], - "x": { - "axis": { - "domain": false, - "labels": false, - "ticks": false, - "title": "" - }, - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 10 - ] - }, - "type": "quantitative" - }, - "y": { - "axis": { - "title": "Prior (starting) match weight", - "titleAlign": "right", - "titleAngle": 0, - "titleFontWeight": "normal" - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": 20, - "mark": { - "clip": true, - "height": 15, - "type": "bar" - }, - "selection": { - "zoom_selector": { - "bind": "scales", - "encodings": [ - "x" - ], - "type": "interval" - } - }, - "transform": [ - { - "filter": "(datum.comparison_name == 'probability_two_random_records_match')" - } - ] - }, - { - "encoding": { - "color": { - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 0, - 10 - ], - "range": [ - "red", - "orange", - "green" - ] - }, - "title": "Match weight", - "type": "quantitative" - }, - "row": { - "field": "comparison_name", - "header": { - "labelAlign": "left", - "labelAnchor": "middle", - "labelAngle": 0 - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "comparison_name", - "title": "Comparison name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4f", - "title": "M probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4f", - "title": "U probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - }, - { - "field": "bayes_factor_description", - "title": "Match weight description", - "type": "nominal" - } - ], - "x": { - "axis": { - "title": "Comparison level match weight = log2(m/u)" - }, - "field": "log2_bayes_factor", - "scale": { - "domain": [ - -10, - 10 - ] - }, - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": { - "clip": true, - "type": "bar" - }, - "resolve": { - "axis": { - "y": "independent" - }, - "scale": { - "y": "independent" - } - }, - "selection": { - "zoom_selector": { - "bind": "scales", - "encodings": [ - "x" - ], - "type": "interval" - } - }, - "transform": [ - { - "filter": "(datum.comparison_name != 'probability_two_random_records_match')" - } - ] - } - ] - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvQAAAEOCAYAAADmPYP0AAAAAXNSR0IArs4c6QAAIABJREFUeF7snQeUFMX2hwtFBbOCiIqCWTDyB8GAgmIiKCoYEEFRMSMKImYRBSNBJCmiYMAA5gCIzwT4DIuKAQMoiBjgCagYn4H/+a6v9vQOk3Zrdnem91fn7IGd6equ+lX39ndv3bpVrUmTJqucihSQAlJACkgBKSAFpIAUkAIFqUA1AX1BjpsaLQWkgBSQAlJACkgBKSAFTAEBvW4EKSAFpIAUkAJSQApIASlQwAoI6At48NR0KSAFpIAUkAJSQApIASkgoNc9IAWkgBSQAlJACkgBKSAFClgBAX0BD56ann8KrLHGGsWN+vvvv4v/n+rz8uqBv160DeV1rTifd+2113ZNmjRxCxYscN9++22cu5o3fePe3WKLLdwPP/zgfvrpp9Xa5b9fuXKl4/5ed9113ffff+/++9//BvVBz8w/8qFD7dq13R9//OFWrFgRpGliZT1POZVTJ5MCJRQQ0OuGkAI5VODf//63W2utteyMAwcOdI8//rj9/8UXX3Qbbrih/f/OO++0n9IWf47LL7/cPf/882mrv/XWW65atWpuv/32Cwad0razvI5v0KCBO/XUUw2uJ0yYUF6XKXHewYMHu5YtW7qLL77YvfzyyxVyzXy/yGmnnea22WYbN27cOPfll1/mtLlHHHGEu+666+zeffLJJ+3/0dKsWTM3YsQIg84XXnjBwH/XXXd199xzjxs5cmRQW5599lm3+eabuxtvvNFNnjw56FzpKh988MHuwAMPtPsp0z11yCGHWHu+++47hzYVUWjfzTff7H7++We79zOV5557ztWpU8ddeeWVburUqasdHu3va6+95l599VX366+/utatW5tBpiIFpEBuFBDQ50ZHnUUKmAJRoOfldcEFF7j11lvPvfLKK8UKCejLdrO0bdvWDRgwoMLgZq+99nJ33XWX+/HHHx1QovKPAhiTm266qcvGsCytZg888IDbeeed3ddff+1uvfVWg79oGTNmjGvatKlbtmyZw9jaZZddXIsWLWycMhm5mdrigR6YfeSRRzIdXubvvZH4r3/9y/Xr1y/teQ477DA3aNAgt3z5csf/K6JgIPGcYaxdeOGFGS85bdo0V6tWLde/f3/3zDPPrHZ8Yn8xUDBUnnrqKbuOihSQArlRQECfGx11FilQAuhXrVplIQO8uI466ih39dVXOz7D8+iBfsstt3S33XabeTvxOP7yyy/mZfQw0bFjR3f++edbSMFXX31lXrAaNWoUg9RJJ53kevToYQbDb7/9Zi9TYISSzkP/0EMPmWcTj3+rVq3snB988IG9vPHKbb/99gZTHEObCTW54447zPsGgADWGCiAFbB7+umnuyFDhrjdd9/drbnmmgYf06dPt88A4auuusotXLjQZii22morm8a//vrr3WWXXeY222wzC5fo2bOn+/TTT22qf9iwYW6HHXawfixZssS+23jjjc0zixa0ad68eY7+p9OANhKW8f7775unEeNq/fXXd2eddZbbbrvtbObi888/d+edd17S0A4Pl/fff7+1iXABdKHfjBfAM3HixOJZmDPPPNOdfPLJrmbNmu6vv/6yNtJ2+nfppZe6ww8/3NoCsG600UZu/vz5Vh/vP2PI74wnXksgifsH3bgenzG+wBAl072D95o2fPjhh26fffZxf/75p4030EVJp1u6usOHD3f77ruv3cfcr7fccov1if7tscce9jkwzuzU22+/vdpfBTTkHsXTzv+5b3270Ji2+mcB3aMzWdTjfuV76k2aNMlA0gM9WmbqN15u9OZe/P33392iRYvs/uQ+SAf0pRk/dEl1nYMOOsidccYZNotHSAvP1LXXXuvOPfdcd+KJJ9qz+J///MeeL/rrgZ7nEiOmXr161m6eR/SJFp7Dbt262XPEfQQsc9/zt4A+33TTTaYvsw/8neEZZKaAceA5Ziy577jfgHDGsXPnzqbV6NGj7e8C9yFtxrs+d+5cd84551gdxoHx3m233exvAPft2Wef7Y499tjV+ovxxewL92fz5s315pACUiBHCgjocySkTiMFUMB76JcuXWoADmzccMMN7oADDjDPMsAKpPBSe+mllwzkeEkCKMAq5ZJLLnHUHz9+vP3Oiw+IAZYoeEap4+EOgN5kk03s+wcffNBexumAHljguhSglhc65b333nOEU+A5AxgBE9qFN5brAWEAHWE8vgBEc+bMcUceeaRBI+2mLgXABZyBId8PXva+cE4ft8x5ePn76Xtghx804V8ABQPBt+Xdd981wyedBkVFRSVuSuCacwBTH330katfv76dn3Ho27fvajfwzJkzDbCAIoCKcfu///s/0wKoAtwpJ5xwgoFJ79697XfgaIMNNrDxQA8MIADKwwv98WFZHI9u1atXt7qPPvqoaexndDBeGH//PUbNvffem/beAZCjfWcM6QcFkMeAzFa3xLqEPGGkMm7cdxg4ACrjDADyed26dQ0+MWASCyDKmgQKBguGDQXvOv0EYL1RyP0PpPtCSEf79u1NCzTG4AJI8ShjpNKWdP0G2pk1o408izwDjCGG2THHHJMW6EszfoBxqutghADvjAdhJ4AtBrP3hGOAcu9Q0OqLL74wD71/ftDIP6+HHnpoiRh37k3uUf+sYtxgCPMcoxOzHdzv3KcYHGjtHQ8cRwHCAfhoyA0wv/fee9ux3Kv+3mW8cVZ4oKd+tP2vv/666ZDYX8bJP+cYIBgGKlJACoQrIKAP11BnkALFCnigB6rwTuMRxTPMixS4A6Z46S5evNg8aLwk8dqx+A9Qa9Sokb3Ev/nmG/Om4a3r0KGDwTQeTApAD3jjxcazzuedOnWyl7R/eWcD9H7Ku1evXq5r167FIADs0V7aA7zz0qXQH7x6Pi4fcGGNABBAu/HKAxsYJEAZ7QIIAXr+xbtKuwF9wACgaNeunevTp4+1GzD0AIdmfEaIBQAHiOPpjobcAHHpNPBwx7/MkOBFvO+++6wvM2bMMEOEmGmAKjEmH8MByKSdjAPFa4pes2bNsnEkrh/IZiaFOt44YJwfe+wxq0efmRUA6D/55BPXpUsXux4gCkwyEzNq1CjzjNIm+u6BHiPkjTfeMG/sTjvtZNCMJunuHc7n++7XcXgjjX8Zq2x0S1aX6yaG3ABujBEzDHhvAXoMRfrEuPvCMRxLueKKKwwEuRf48d7ap59+2maGUoW9YPBgiHHuu+++u1jHRKBP1nbi7DHcgHnuVe63Pffcs/iZSeeh90CfzfjxzKe7TmIIypQpU2ymCsjlPuX+4rngHuPviQd6nh+eCe4HjEWM3HfeeafEX1//9wfDi/56JwDgzdh7nf1xGE0Yrtdcc43beuut3ezZs93DDz9cAuiBcp5r7nHGzz93iUDPeTBMuEcwYr2RnizEiOvizec5uP322/UGkQJSIAcKCOhzIKJOIQW8Av5FyYuZFxsvQwAYLxThHh7o8YIxxR71ZHq48dPrHOtf8pzfv1gBekJfvHczqr73zmUD9P4FjXcfwKHgyQMo8ZZHM/PwHdPsLFKkPwAjIE/BY8vnUa8znxNOhBcSoPf9xGsLbOGd5Xx+AR7H4bkj/CFZeeKJJ2xKPwr0xCCn08BDLSDks6X4MBp/DQCHxZ2Ji5R9vL43kIAdv8AZ8E4sHmqjceXeww/QMFMD0HvgxxjCkPPrLIA2DBzCVxgXgN6PpTcK8HRybwC96e4dZhR83zEEOQ9wR0gU8Mj4ZaNbsrqMTyLQ02fuF18wUtEDYyRaMCQwEin+3MzmAJr+3gMmcwH0ydoOtHJ94u6jxY9xNkCfzfgR+pLuOomA659rb+RE2+ZDbqLrOPx9hb78vYkWD8pAP/cbwN+4cWMzYJkl/Pjjj81Af/PNN5M+Z0A6hrj30J9yyikWosOY4qWncH9iMCQCPfcwRqz/O+a/Twb0fqYv+ndEbxEpIAXCFBDQh+mn2lKghAIe6AnDwNvsQ2XwJgK+HujxNuPJjsaR+pcc3nkAGA8WoSF4z/GQ4bHD4wZAEbsKZBJ6Qr111lnHzg3w4SXNBugBQ+J3Pbzy0sa76z3LvIgBAx/THwX6aJiKBxIAmzoAAB7+0gI9nknAmnYQ1453F+AjNIL+oEcU6AHsdBp4qPUATgww8d+MCZ51wJYQKICX/+ON9wUIGjt2bDFUU4exZeaBsA+8+hhtnJM2M3NBqAQeZEKsMN58BhP6RahLNkBP2BOg5j30GFfo66GIe4NZhnT3DuFPiX2PAj06lka3aN1EoKdtRx99tBkIaIOOfv2DNxi9ptHF4XjHuXcJdQFkPTDmykPvxzzadtahcF1mD9CPMCE/E4QhmwugZ/yA5XTXSQRcnjeeXUJbMC6PP/5489AD7j7kJrooNh3QY+gRL+/X6xADTxie/52ZLv4u0UbuacJ6eMYx6nkW8KpzH3ug90Ynsyv8HeLvEX/LWC+RCPR+UWw2QO+9/DgSWEujIgWkQLgCAvpwDXUGKVCsQBTo8TgDThQAH6+sB3o8xQAwL0rgnsWfLJak8GInFhXooLDAjHh8H2MP0Dds2LA4TAb4I34WqMIDx8LMbIAekOUFzbl4uZMOkvhaQBkA4MWNh9D3gZhdDACgLQr0HjDw3NJGvNEUptIxMLL10AMPxPlivBCKAoD5cBeAGAjHCwh4ox9t9qFCyTRIhFrvIabfxBdzHowZ/qXNUaCnHz7DCiFPtMWHRDFexGOjOQWNMEA4Dt1YiAoscg5mHvCYo0VZgB7vMeDkIRkPNuEm6e4doDAd0BNyUhrdEoEegxH4Y/El4VmEADFm3HMsBAYqGZtkISHEjLMwGqjmXiXsCEOA0DG8vuUJ9DxHGMJoClDSTsYolx56gB7DO911WJyKccwsFd5wZiSAYNZWMG54wvm7gLZ45hOz3KQD+uh9i0HMfe2P597kXvT9557yITwcxwwb9w5hRdEYeg/wOB+Y6fKzO9kCfWJ/GWO/jgdnhJ+10WtECkiBMAUE9GH6qbYUSOqhx8sGWBO76uOwvSfOZ7khFARvbjRUBYj0iyujC1A5By98FvH5sI7oQj0aAagBRXiPswF6YMHnxseAYHodD51/gXNOPmcBH23EuwdgJwI9MMt1mT3gpU8dFtkRjwvk44HzITc+hMCH3KABho+HKjzowIRfcAqEAJRALLACiAOPACHtSKdBItTSn8TjOT8zCYkZQzjWx50TTgSI4pX08Mr31OV8hDkAYHxHqktfACq87YQ9EGePdt4Qoo+EGyWG3CTG0GMQ+QXMADT3FAZJpnsnse9+TAm5wcteGt0S62Jocn9TAEDGjvCr6H1MiMdFF1202l8HZlzQi/UGvgCG3HtkP8oE9Mz+YNj5GHofYpIYQ+899NG2M47E7ftFpxiNGKuMI7MMPJep8tCXZvx85pdU1/FZpDB6MGrQDi86YO8LIUvMcCRLW+kBPXEGxNf1Mw3cLxjCPsyM2R1mbyjoT/x69Jo8r7Sde4v7k+cYIxsDzGvD3yBmC9DJLyb2i2KZsSJEEEOONSMe+Jn94Bn3/fUOB9rhDQy9RqSAFAhXQEAfrqHOIAXKrAAvOTzkPu1b4s6YeOZZBMjLNtkmLEyVA5p4jLPd5Md7xwAJFucC9bz8owXwImwk8fNUHQW28YAT5x66WYzXhHbR7+gOoH6XUMCCRcaU0mqAprQV4CZmHWhOVghXYMEux3Tv3t0O4frUxUuJ0ZS4OymGDFAPzGSrXeK1fWiKj6Fn5gbY9f31x2e6dzLdlKXVLXo+xoa+0ia/8HX//fc3qGe9SGJbE9sCSO+44452bEXvwMv4YfxmamMm/TJ9n+46GKUsHsaw5YfC7B1ecwzAXO/QmqqtjAOzSRiSzDwlKzgYCOljdohZDow41mNEnQ+ZtIj2l5k+Zu0Ye7/gPlN9fS8FpEBmBQT0mTXSEVIgVgpEgT4xtWOsOpqDzvgFoMy0AOkVURKBviKuqWtIgVQKkGIXIxUDkxk4PxNT1o3FeKYwJllIna0TQqMjBaRAZgUE9Jk10hFSIFYKEMLCNDqhMsQ8q6RWgNkP8p8TEuGz3JS3XngzCT0hlIVFoypSoDIVYCaGMCueBcKISKVLqFOyXWEztZPZMdaTsAYkMbNUprr6XgpIgfQKCOh1h0gBKSAFpIAUkAJSQApIgQJWQEBfwIOnpksBKSAFpIAUkAJSQApIAQG97gEpIAWkQBVQgFAesiWFLlquAlKpi1JACkiBglNAQF9wQ6YGSwEpkEkBUvux4Jet6H054ogjbAMoFuORnrOqFbKSkLaSVKm5KC+++KLlE2dvgmhhATELJ9lkS0UKSAEpIAUqRgEBfcXorKtIASlQgQoA9KTQZCdQX/yOuJ06dbL0e1WtlAfQc052CI0WdrJlNoDN1FSkgBSQAlKgYhQQ0FeMzrqKFJACFahAJqAnfzyb3ZD3mzAUdqtkYygKWW2Af/K8k4OebECJ+ebZpGjbbbe1HP5sskO2IPLSs/sohUxCZPFgwyk2CGvSpIltvMVxbN7Dpl5sVsXmVeT1pvB/Nk6iLWwa1LFjR9sLgA2BzjjjDMubzqZEnG+zzTazXTvZTfSzzz6zrCO0k51K2SSLjbmoTwpMvPIYNtTjXGQa4fP58+dbHnD6n+y8pBdMpwce+kSgp91sEkVh12RymKfqf/R2YPMj8rBHCxpzjVT1yeOOxmyORDpF2svmRmhGn9mcidz49A+NjzvuOOs7OrLhEmOgIgWkgBSIiwIC+riMpPohBaRAsQKZgL5Xr16Whu+WW26x3XybNWtmu1YC5AA3IPnBBx8YJLPRD5twRcvYsWNd48aN7TsgGQOA2PRJkyY5drtlYy52hh0wYIBr06aNpeljxoCNqjjv6aef7mgju3r6EBh+ZzdhoJ5/J06caCkCaQ9wevHFF1uIC5tqYTCw0ywb/LRv396MA+oNHjzYvfHGG2Y8sEPn4YcfbjsL0yb6xAZg7OaJocEupOwISr1U523Xrl1KPZIBPZtFDRs2zM5/xRVXmLap+h/Vk/ScaEbBs49BAnR37do1ZX1Se9auXdsMINIpsnstKRGfeOIJ0xsj7JFHHnEdOnSw77/44gv7/LTTTrM0pGilIgWkgBSIiwIC+riMpPohBaRA1kB/0003WS5+oHPGjBkWVz99+nQDcjzf/E7B4wuQAsSJQM8OrkAz5c033zS4Z8t7H6vfokULOyc7xgLwFMJTOF/z5s1TAj2eZjzuwCft9Lv2kg+cNQCci509mUEA9PHIH3DAAeaJHjp0qBsxYoRdC2MCaG/QoIGBNd70Tz75xPXo0cO+x4B44YUX7BypzouxkEqPZEDPeaMhN2ibqv/Jbldg+9xzz7U+oEGq+kceeaQZJgD8hAkT7FQA/u+//276oB2GBdpg0DCejAs7xKIvsxNcR0UKSAEpEBcFBPRxGUn1QwpIgWIFANXPP//cnXnmmcWfETvPlvNAOJBLiA0ea0AYD3W/fv0sTMTvhOkrrlq1yu29996rAT3hG3h/KdQjbAQQbdWqlYXz8O9LL71k3vRRo0bZcSeffLIt1G3atOlqQP/aa6+Zt3zIkCH2442F5cuXWxjQfvvtZ/UTy9SpU92SJUtc586dDWoxPmgzIM0sAnDLLABAj4FBKAsFIMcQ+fbbb1Oet3Xr1in1yAboCUNK1f/EfjBjQmgMi5mZlaCkqk8oDxrh2fe7jTJrUr9+fQutoc+cg/rojfce/SgYAtTB+FKRAlJACsRFAQF9XEZS/ZACUqBYAcIwiLHGQ+0LHutDDjnEwI7wGuLN8dITR074CmE4gC+GgM+OQ3w2kAg0RwvwWKtWrWJPPkCPN3jkyJElgB7AxgN+2WWXWfVBgwaZdx5QxkPOefGO420HrgmzYZaAuPyPPvrIDAbAk3hvYL9nz54Gp0A+hb7MnTvXYsjZyZb4cIAYI4UZgk033dTCVjhX4qJYD/R8l+q86JhKj2yAHoMmVf+jevq4dzzseNJpPyVVfdYHYLSxgynt8KCO5/2qq64yoGfdwbvvviug198FKSAFqoQCAvoqMczqpBSoWgoA54As3mFCN1gciWeadJV4cKdNm2ZeWhZxAubEoBOaQfx2o0aNLHb++++/d6NHj7aYdUJPygL011xzjcXSsyiVcBcWa+I1Jh4eTzEgf+qppxq0E0YC0H/44YcG+cwuEMbjPc8AKrH1hKEQKtSlSxerh7caWOdzYs+J0//1119tVoA4cu+ZTgX0HJfqvCxwTaVHKqBHbxbtEn/PGoVU/Y/qiRHDrAmzJF999ZV9xdoE4DxVfcbw559/tvUNxNITesPxGFcC+qr1vKu3UkAKOCeg110gBaRALBUgfGPPPfcs7tsPP/zgCLtZsWKFLbgEhAlNoQD3xFnj1QeqyV5DAYwJ3WAhayagJ3YcOPYhN8wCcD4fU+7PByTj9eZfPOOUX375xUJbAFtCSYB9QnpYaIv3HTAGuoF1Qmt8wfuNUUIh4wshNswAUPgO77vPOpMM6FlAy+xBqvOm0yNVHnoMEX7wlmN4pOq/7wMLaGlrYmHhKgtcU9XHS08IFQuAKRhrzF6QwScT0HNs4kLnWD4E6pQUkAJVRgEBfZUZanVUClQ9BWrUqGHx6h9//LEtiIwWgHm33XYzLziQ7wtec7LH4O0mDCZ0Z1WAFdDGOCA8Jno+4JPwGgA/sRBTTvtpQ7RssskmlqGHFJhff/11zgY11XnLogfa4j3nJ13/s2l8uvqE6vzf//2fW7BgQXEsfTbn1DFSQApIgbgpIKCP24iqP1JACkgBKSAFpIAUkAJVSgEBfZUabnVWCkgBKSAFpIAUkAJSIG4KCOjjNqLqjxSQAlJACkgBKSAFpECVUkBAX6WGW52VAlJACkgBKSAFpIAUiJsCAvq4jaj6IwWkgBSQAlJACkgBKVClFBDQV6nhVmelgBSQAlJACkgBKSAF4qaAgD5uI6r+SAEpIAWkgBSQAlJAClQpBQT0VWq4c9dZ8mfzw1bz+VS23nprt2TJEtshs6LL2muvbVvWh+YtL2u7yVn+22+/lbV6iXpsrkT+73wt5HNnB9N8LWpf2MhIP+kXpkBYbd1/4fr1798/7CQxq+03K/zxxx/LrWcC+nKTtmJP3LBhQ9tRMVrYdObqq6927777bonP2Z3y6KOPdq1bty5zI9nF8aWXXrLdKZMVtrNnN0Z2k8y2XHPNNe7uu++2HTMfeeQR16xZs1LDMTtqsvvnJZdcku1lszqOHTPPO+8821o+VWHL+UGDBrmnn346q3OW5iAAe9myZcU7iybW3W677UwzNlFKLGUZi6KioqTnKk2by/PY4447znZVzdei9oWNjPSTfmEKhNXW/Zc/+h1zzDG24zTl0UcfdTfccINr0KCBmzx5sn321ltv2W7eqQrHf//99+6mm25KeYzf3fr22293EyZMCOt8itqvvfaa7Wq97777pjz/Vltt5Z588kljtjPOOKPEcWzWh5HELtjokKwI6Mtl6Cr+pB7ou3Xr5r755hu35ZZbutGjR9vuiV26dCnRIL7jgeAGK0vZf//9bSv6Fi1apARubsr333/fXXnllVlfgh0x+/bt6wDjQw45xD333HNZ1/UHctM/88wztgX8woULS10/VYW9997bjRo1yvFvqlKeQM/OoHjfP/3006SXTwf0ZRmL6Uee7+68I7mxljNRA0606667ug8//DDgDOVbVe0L0zff9XO9OsigDBhiAXOAeM65qqRfFOjZEZq+9+jRw5111lkmIs6ns88+O6Wgb7zxhu1YffDBB6c8BoPg9NNPt3c8TsXyKDfffLNbc801XZ8+fVKengiDxx9/3Nipe/fuJY5jV/Px48cbH+GUFdCXxyjlyTk90Ldv3959++231ipuoP32288de+yxZtF98MEHFqZwxx13uCOPPNJ17tzZvseCXXfddQ3+Bw8e7GbNmuWeffZZ98UXX9i26sAx3nZfmAn47rvv3EUXXWRhN9xk9evXt1AXjIiddtrJjAg85SNGjLDzYGGztTzTTQMHDjTPffQaPKg77rijhaxwsw4ZMsS1bNnSDRs2zG288cZmgBBSMmfOHHuYAfe77rrLbb755u6rr75yNWvWdJdeeql9j+eWvvTu3bvE6AwfPtyO32KLLWwWYPr06W6fffax83/22WfW5k033dSNHTvWYSn//vvvphttmDFjhl0DL3mbNm2sn2jzxx9/uKlTp7oBAwbYg4b2nJ+CLmPGjCluA9B9//33u6OOOspCgjBY8AZwPX7mzZvnRo4caefeeeed7dzMhPAZeixfvtxdf/317txzz3Unn3yy++uvv2xcVq5caWONh/6TTz4xHWk7/cf4io7Fvffem9Ud+8js9bM6TgdJgaqowKQb2wjoAwa+KgFpgEwpq1Yl/TzQE8rKOw8P97hx4xyACyB7oGd2vFWrVhYqCp9ceOGF9g5k1hoWeeeddxyeeH/cGmusYU6/888/34wEgJ6oBtiCwvuU964vMBYRCTAGdfCU43HHUOD9S7s4//rrr2/X5j3866+/2vWmTZvmnnrqKWtvu3bt7L3M5zAF8M77Hu6ClwB6ZhTWWWcd+8FxRdteeOEFR9gOGsBgHJtY5KEvj6etEs7pgZ6bEGiuV6+eebmZjgLYuZkA7qFDhzqO5SHhRuRGIw7+xhtvNCAGRqk3c+ZMxw3PDUy4RzQmnVAbPgNCqXPYYYe5jh07mkVJKM8RRxxhLzvg8vLLLzdw//jjj81bf91117kddtjBjoleA7jFs047eGAefvhhC7kBgLfffnvXr18/BxADsx06dLAHFePksssuM0sdzzkPEefkHNRNtMhpM9cmbIZzcAzX5cHEkOFhJDwFgwSjgRkIrnPAAQeYcQFM8zA2btzYAJ7rMttx7bXX2vmAf7zozDKgBcdhMEUL0A+w//LLL6YdhsQJJ5zg8CLQlyZNmpixxXf8ccIYoy2cE2MCHV555RUZa257AAAgAElEQVQ7B8YDfeHzCy64oBjomT1BA8YVz0V0LLKNsRfQV8JDrEsWjAIC+rChqkpAGqZU8tpVST8P9MA271veiQDtDz/8YL8D9D5Uhs+Y6T/00EPd/PnzjSl69epljkIcY6yNgGmAYmAetsBZx8w30Mzn8AfvbmCd93eUfXhP42jj/Y6zjUJUBOfGKdq8eXMLRcbRybFwCRAPIxGKy/8Bf+AcRyJMhhMRByMOV97nAD3lvffec7vssotjbR6h0/AMTICxgjHw6quvCujL4+HKh3N6oGewuSnx0OI5xktcu3ZtA3o88niB8YDzkJx44okGtG3btjX444bkJgEw8RwDgrfddluJ7gH53KjAMxYlMwLEdfHwvPzyy+6hhx4y6zIa5sH1uTYW65577mmGApAMfEev4UNu8K77GHqAHg+0n17jGIwEbnA854899phZrXj8PdCfcsopBv48XNEC0PPg8MeQdhOzz8PFw/7666+7W2+91XRiRoIQFx4m/mDwMPLg+5AbvO48ZKeddpqdHi8//eeBfeKJJyxWj/PyRyYxph1d+QPBD8YT56etnJPzTJkyxf5g4E2gcG3Oufvuuxu403/+mHhjBSud2QsP9LSJPwS+bxhOZQm5EdDnw1OtNuSrAgL6sJGpSkAappSA3gM9jjfW/THLDETDN7zfAHrWzDFrDYiz3o13K06zAw880HjFh9zguMM5xzuXdynvy59++snVqlXLgB4egi2AasJfOD9M5QsQD8fgcOQeBvphHvgGpyUOywceeMCujZHAscA97MC1AHqcf1yH2XWME89QUaAnbBrHHqzDMdSnvQq5KY+nKQ/PmSzkxjcTaARUvbXpgZ6HAJD0n3swBmh5aAhRAayjxUM/U05AsIdOPPNAMFNbwHoUIv3CWAAeq5SHzgN99BqpgB4rlvAeCsfg6SdsB2j1cfbMRHAM18BQwSpPXHwC0PtzYcTw4ALRFA/0zE5gdLC+AO85D2Ei0NM3HkYfx8Z0GQ8gsx1+USwgj7aJQH/SSSeZ1xwjAvDnDwALYNCednNd2vjRRx8Vy/7888/b7AFAjweCmQXaRKE+enqg99dDI2YYBPR5+LCqSQWvgIA+bAgF9NIvWwU80MMwvJ8pcAiwy/sWoMcTj2ORGWje5UA74S6JQA9s8772zk1CX3Cu8d6MxtDDPUQEeGenbyvHEG8PsPMuxkFIW/jBwcg1mWnne5ynvsBTMAtATwgs5wf44QH6xIx6FOh9DD3x9rSVGXkMEwF9tndNgR9XFqDHywtAYmEC1gAuNxcPAWCcDOiRiRuL+HVCbrAcgVOAEi82lisWKJ5jvNY8cMSq8z3XAqqxfFMBPVNOPAhRD30yoCckhdh5HjDCZABe76GnH0x1YeFGSzZAjyHAlBnnANjx9gPPPNzei84DzQupU6dObrPNNrOHDCuaNmcCeqx1ZjIoGBzU2WabbWxdA3qiOeFSwD3/MkNBeA8eCYCecBv6jqHx+eefm0FDiFU6oMfbwFgQtpNtkYc+W6V0XFVUQEAfNuoCeumXrQJRoIcxWDAPhOMYYwErQE/4KZ5uwl6B6a5du5bw0BO1gFeeuHs4gYgE3vNEIwDWhN1kA/SsA4RnKITNEOMOy1AwInBY4vBjlp0Zf+5z1uPxvr7nnnuKQ2649gYbbGBhybzn4YJsgZ4IBt73/JtYFEOf7V2V58dlA/R4o/2iU5+2EoDl5mfBCaEwfpU3QI/F6VNDRbvPZ3ioiS8HSjmO+kw/Ee4BXAOhWLeczwMxxyxevNgW0GKx8jBFr0HICtNleNr53MfQ87D6Ba4YEyywxRLGQ88Dxv+xkP00GoDtITcR6P25knnoiT0nxh5Q54HkHBtttJHBMCE/zDQQyoTBAiSz+IUZCT4HsomPxwrHW5DKQ097/KwCbQCy0YGYvxUrVlj8PqE69Ivx8FY8xghTfzzIaIPu6M0fNhbL8kcqmrYy6qGPjkW2K/gF9Hn+wKt5laqAgD5MfgG99MtWgSjQ867DgeedhbzPmJ1nthtnIh5w3t28G3l/8l7lfcp6O96fON4IYSVuncL7k/MRyhMFes6F8ZDoofcgD7gzIwB3EOmA05H1dRQMDcJ/SOJBIYwZfvFrBomIIBaedz8LcHnv0x4fKgtb8H+MAOrhxcdDz7oBnIGE+2JMwBwC+mzvoip0HDcT01DcRMSaZSp4vplW8mkrucG4Sd9++22DYF+4WXkAMSJ4OPAwE18PrPIgJV6LBxCgZ/FLpsLNjkVLmwkpAmZ9e/Bi8300bCXT+aLfE/PPg4rRwkNJOAwWPe3jQaZPFOLjyOBTHptY+Sw3eOGjhZkC/sD5tQ0YRuny0ycbi2y0ENBno5KOqaoKCOjDRl5AL/3CFFi9Nu9n1r0Ry564uSPON7LfwB8UeMR7xctrI0hmA3Bg+mv6FtNODBDWBhJqQ+gQswusJ0yXK5/61IWr4CxmIgT0ub6Lquj5mFZieivbNIi5lgnrFk860M2iUMJ0sGT5nIc1XV7aXLelIs+H8YTBglGB0cEfKkKD/HqGsrTFb6oRrft5tVZlOZXqSIEqoYCAPmyYBfTSL0yBwq5N2k3W6vmC05PYetbthRSF3ISoV4XrYiXyg5e8sgqzAnjlscgJc8HSJtQFuGfBSlwLMyqETJHDlqw4iV78XPRbHvpcqKhzxFUBAX3YyAropV+YAoVfG6An/p4wYDL4EHoTWgT0oQqqvhSIoQIC+hgOqrqUMwUE9GFSCuilX5gCqp1MAQG97gspIAVWU0BAr5tCCqRWQEAfdncI6KVfmAKqLaDXPSAFpEBWCgjos5JJB1VRBQT0YQMvoJd+YQqotoBe94AUkAJZKSCgz0omHVRFFRDQhw28gF76hSmg2gJ63QNSQApkpYCAPiuZdFAVVUBAHzbwAnrpl60C7x81O9tDkx63+1NNSnxOpjgSSkQTZ5AOkj1ncrEwNaixgZUVQx8ooKpLgTgqMP3I892dd9yZt11jt8APP/xQ7SujAtKvjML5ar06WN7ofC0C5rCRkX75o9+XR4cB/dZP/AP05KFnQyc2kGJDSNJcU9gV/ogjjrC87oA9e9gsXLgwTIBKqi2gryThdVkpkM8KsJ02KUDzteiFGzYy0k/6hSkQVlv3n/TLVoGVx6YG+lXOuWppTsT3Gz72D9CT7pldY+vVq+e++uorA3q89a+99lrxjrAPPvigbdR4/vnnZ9u8vDpOQJ9Xw6HGSIH8UEBAHzYOAhbpF6ZAWG3df9IvTIGw2jm9/zql8dBnQ/SPlgy5ueGGG1z9+vUN6Pfaay83ZswYt88++1iHb775ZrfDDju4Y489NkyASqotoK8k4XVZKZDPCkycONGmKPO1zJ071zVq1Chfm+fUvrChkX7SL0yBsNq6/8L169+/f9hJ/le7xnFhITe/TUoN9AcffLAbOHCg23fffe1ql19+uc1MC+hzMnQ6iRSQAlJACkgBKSAFpIAUcG6TE8OAfsVDqYF+ww03dC+++KJr1qyZ7TQ/ePBgk7xPnz4FKb089AU5bGq0FJACUkAKSAEpIAXircAWncOA/psHUwM9yhFDD8jPmDHDTZ482Y0bN85NmDChIEUV0BfksKnRUkAKSAEpIAWkgBSItwINuoQB/cIHVgf6bbbZxnXp0sWEO+eccyyzDWXJkiXuqKOOMm99IRYBfSGOmtosBaSAFJACUkAKSIGYK7DzyWFA/8n9JYE+mVwbb7yx22yzzdy8efMKWk0BfUEPnxovBaSAFJACUkAKSIF4KrBHtzCgf+/ezEAfF+UE9HEZSfVDCkgBKSAFpIAUkAIxUmDvU8Ly0BdNENDH6HZQV6SAFJACUkAKSAEpIAUKTYEW3cM89DPvEdAX2pirvVJACkgBKSAFpIAUkAIxUqD1aWFA/6+7BfQxuh3UFSkgBaSAFJACUkAKSIFCU6DdGWFA/+xdAvpCG3O1VwpIASkgBaSAFJACUiBGChzbIwzoHxsroI/R7aCuSAEpIAWkgBSQAlJAChSaAp3PCgP6B+8Q0BfamKu9UkAKSAEpIAWkgBSQAjFS4NSzw4B+/BgBfYxuB3VFCkgBKSAFpIAUkAJSoNAUOOucMKC/Y7SAvtDGXO2VAlJACkgBKSAFpIAUiJECF5wblof+9lEC+hjdDuqKFJACUkAKSAEpIAWkQKEp0Pe8MKC/daSAvtDGXO2VAlJACkgBKSAFpIAUiJECV5wfFnIzcISAPka3g7oiBaSAFJACUkAKSAEpUGgKDOgZBvRX3y6gL7QxV3ulgBSQAlJACkgBKSAFYqTATReEAX2/4QL6GN0O6ooUkAJSQApIASkgBaRAoSkwrFcY0F94m4C+0MZc7ZUCUkAKSAEpIAWkgBSIkQKjA4H+HAF9jO4GdUUKSAEpIAWkgBSQAlKg4BS4+8IwD/1pw+ShL7hBV4OlgBTInQITJ0501atXz90Jc3ymuXPnukaNGuX4rLk7ndoXpmW+6zf0nofcFltuEdbJcqz9zdffqH0B+kq/APGcc3tsvZnr379/2En+V/uBNEC/yjlXLc1V+P5kAX1OxkEnkQKxUODggw92devWLdGXRYsWuZkzZxZM/4DfTTbZxM2aNSurNhcVFbmmTZtmdWxlHHTccce5SZMmVcals7qm2peVTCkPynf9Zv+6flgHVVsKxFiBG7u1ydnf58kXhXnoOw0t6aHnPfjDDz+4v//+O3YjUK1JkyYYMSpSQAqkUGDKlCmuWrVq7ssvvyw+Ys6cOW7EiBFBml166aVu7bXXdgMGDAg6j6/86quvOoyPP//8c7XzXXXVVW7HHXd03bp1y+paAvqsZCpYIM13YM739gnow54P1Y63ArkE+qcCgf6o/wH9Ntts48aPH28gv+6667rnnnvOXX/99bEaCAF9rIZTnSkPBQD6qVOnuttuu63E6QHyAw880LVv39517drVdenSxbVt29ZdeeWV7rDDDrNjqXvdddcZTI8ZM8ZtsMEGbvHixe6WW26x82EoDB8+3N133312/G677eaGDRvmfvvtN1e7dm03Y8YMt+eee7qNN97Y/gAxjdmxY0d3wQUXuBo1arj58+fbdanfsGFDt2zZMtehQwf7w7X99tu7FStWuEsuucQdeeSR7qCDDrIwmnXWWcc98sgjbvDgwSnlEtCH3Un5DqRqX9j4CujD9FPteCuQS6B/PhDoD/sf0N9+++1uvfXWc6eddpq9Gx9++GHXunVr89bHpQjo4zKS6ke5KQCUA8FLly4tvsbo0aPd7Nmz3QsvvGDQ3qZNG3f33Xe7Z555xj355JOuZ8+e5gkYOXKka9WqlRsyZIgB+LXXXmtg/9hjj7m99trLPPTnnnuuATxl7733dpybc/DZCSecYCC/cOFCd9ZZZ7nmzZsb5E+ePNn+IHGeQYMGWSjN9OnTDeZbtmxpx3bv3t3169fP1axZ082bN8++wyDYY489zAjZb7/9BPTldNcImMOEzXf9BPRh46va8VYgl0D/SiDQt/wf0POu/Pe//23OtDXWWMO9+eabrkePHu6dd96JzWAI6GMzlOpIeSkAsP/888/ugw8+KL4EQM3vJ510kuvdu7dbsmSJa9eunf2hIPYcD/0uu+xiP8Azx1988cXu9ddfd9ttt50ZCPwxAfIBel8A+lGjRrn999/f1atXzzzpgDfHvfjii3Zupg47derkdthhBzMA7rzzTjd27FiHV51j+f3zzz+3UJ6NNtrIvP6E4rRo0cIdfvjhrk6dOmYkpIuRl4c+7G7KdyBV+8LGV0Afpp9qx1uBXAL9G73DYuibD/knhp533rRp04pn2j3QEz4blyKgj8tIqh/lpkCqkBsueMwxx7grrrjC4uv5P7CNJwDYx/InFIdwl6eeespCYz799FMLn8EzT8hOMqAnFAcwb9CggQF9s2bN3IYbbmhAz/+JlWeaEG/DoYce6u6///4SQP/EE0+4559/3kJ3uA4Lek855ZTiGHoWBeHN90B/5plnOn4SixbFlv2WEjCXXTtq5rt+Avqw8VXteCuQS6AvCkxb2fR/WW6YGf/pp5/MsUboKe9PZrzjtDhWQB/v50q9y4ECAD2hNcTg+cIfAWCZz19++WUD66FDh5pH/Pjjj7d49XPOOcedfvrp7qijjrIwm7feesvi6Z9++mkDaoCdmD7CY6Ie+nRAz7mBfDzuW221lcXO45HnB6864T19+vSxuHuOvfrqqw3kSQPoF8UmAn0yieShD7tx8h1I1b6w8RXQh+mn2vFWIKdA3zPMQ9/09n889LyP+bvHu/PCCy90xx57rK2Bi1MR0MdpNNWXclGAqTrCVKLlu+++s7j0XXfd1R1yyCGub9++5qE/8cQT3UMPPeT++OMPt2rVKgvBwVv/+OOP22JZjADCd4444giLj2dxKwCPl51CCE06oMdrjoGB4UBZuXKlefmJm8dzz3WJnceAqFWrlnkfuAbXE9CXy+2R9KQC5jCt810/AX3Y+Kp2vBXIKdCfGwj0o/4Bema5WZuGE41kFISk4lyLUxHQx2k01Ze8UACIJ76d8BoAfv3113fLly+3thH+8u233xa3kz8yv/zyS9JUk+k6A5wvWLDA6m255Zbu66+/tmuRjuv7779Peq3SiCMPfWnUWv3YfAdStS9sfAX0YfqpdrwVyCnQnxUI9HeUzEOPE+6zzz4rTkQRp5EQ0MdpNNUXKZAjBQT0YUIKmOOtn4A+bHxVO94K5BToTw8E+nElgT7Oygvo4zy66psUKKMCAvoyCve/agL6eOtXf6993OyiMNAIUyh97SZNm6h9AQJLvwDxiFfvfHTOdoot6h72nDW9R0AfNpqqLQWkQEErIKAPGz4Bfbz169+9fkZgfvr92mEiBNTW/RcgXgFkWapK41vUNRDo7xPQhz0Nqm0KEOfMbp+kR6rMwkZCxFaTA70iC7nS2WjJb5oUvTaZVkgZxQ6svhBrzgp0Ujzmsuy8886O62XTf9/mJk2auG+++cbyuWcqrJRn99dsjs10rmy/pz+kriyvlFsC+mxHIvlxVemFG6ZUYer3aobc2H+vquZaDf2/8pAmq3Pq/stKppQHSb/80a+oSyDQPyCgDxtN1TYF2KWT3UA7d+5cqYqwS+nmm29uedArsrBzKjlf/YLQ6LXZOIkdTsnO4guGxx133OH23XffnDaT7DP169e3vOyZim8zGzW98sorbvjw4ZmqWPYaNqnAcGGTKDLdlFchz/348eMN5DHSyMBz/fXXF1+OTaQGDx5c/DuLblesWGHpukiv+Z///Me+w1ghpWaqIqAPG0EBQbz1E9DHe3z1/ObP+BYdHwj0jwjow0YzRW3S6wEj22+/vUHGJZdcYvmx+QxvNin3brzxRvPQAlYACyBGmj8Ag9zeZAQ5+eST7YcUfltssYVtEjB69Gg3YcKEpFc+77zz3AEHHGDZQNih89FHH3U333yzXRcPMWkG2QCInTTJNQ74AWd412fMmGE5vdkMCHjq379/0msk6xu7cnqgZ+OeU0891dIl0b8bbrjB2n3vvfeaHlzv1ltvtTRKbCC0cOFC2+Hzxx9/tN1Ily5dWuYxiQI97WcXUwrpD8mLTorDtdZaqzgFIznNE8cj2cWpN3/+fGv3s88+a1oxfpMmTbKNjhjfbt26GXCS+YVdUwHRyy+/3MbaAz0bLOHlvuaaawygAXo2ULrpppvsHvjoo49s4yP04x4g/zpjxAZOiYUxpl1kk+EeY0Olo48+2u200052T2UaW/LK02buA4CeXVuTjRHH0Xfuz0WLFhks9+zZ0zLNvP32265Ro0aubdu2Nn5ovMEGG1juW8ppp51m+pK3vrTjSrtIu8U5uG8efvhh17p1a/PWJxYMJLRg1oGc9xgeaPHf//43470koM8oUdoDBATx1k9AH+/x1fObP+Nb1CkQ6CcL6MNGM0VtwJRNdMiTjfe6Zs2aBtQk/Od3wAMAJuyCfKGbbrqp7cLpgZBc3wMHDrTte/E4A+Bnn322gT0Qxu/JylVXXWWb+1x22WUOzzCgzU6cwDM/ACwwBiwC2hgHXB/IJlc4IA9g03bCRJKVZH378MMPrT9A5cyZMw0m6TNtps8AF4YEeuDVpd/ANt5ecpzfcsstbsSIEWZw8G9Ziwd69KNf9BWwHjlypG1EBLACycAfsJdsPJJdGzAnbzrtB3BJncgYzpo1y3Kzs3kSmxuRV50QEbzX5H5FV64N0D/wwAPmKQZQ11xzzWIPPcYGII/2gD6///XXX+ZpBuQZI4A9sXBOUkYCz8AvWgLZGBPon2lsOZ424+UG6NlZLtkYvfHGGwbRXO+iiy6ynVmB9x49etjOsIQajRs3zoxG8sNj4LAJFAXN2PAJo4PUlrSLTafQ6Nprr12tTxzrU1HSd0K4uDcwkvz21exKm1gwsmgnmmMQ0TYMyl9//dV0mDhxYspbSkBf1qftn3oCgnjrJ6CP9/jq+c2f8S06OhDonxDQh41mitrADXHGAAYb4+CBBjT4zIMMgAIUAmMff/yxgT7gQe7u3r17G3zjsWfTnmgYCfHRgPMnn3yy2tUBeuCZhxSvO0YERkAqoMcrC/jXq1fPduUE/vHA4zmnXrKSrG8YGAD95MmTbeMh4JQCtAPWgC7GBR5pjiM8gjpogDea2Qu8yoRJoENZiwd6wJT2c30Al5/27dsbXAPPeHCZ5Ug2HnihEwvjd9dddxlcAuRoi9HEeTBOPBzTT7z3ePI5DmDGaw3IAphozIyJD7lBE7RmnDAwAGCMACATT367du1SSsH5MUgAZHTnPIy/B/pMY5sI9Bg5ycYIUGYGAuhnLAH6L774wu5R+k5fMZLQAoOMGaLEtQR46Xv16mWzEQA7ddi9LrGgqTdeuIcwaNGU4oF+zpw5Jao1btzYDDRCmriun9HCOOCazB5wj6eKwRfQl/VpE9CHKVcY+gnow0ZZwCz9slWg6MhAoH9aQJ+t1qU6DhgBfAA5vKZ4KPEU8jlQSQHE8IoTjoCHEYDHiwvcA4FRoCccA283BcACfAC4xALQ+V0yMST+9a9/rQb0eIEJrcFDn2mnzmSdTtY3YucBdeAII+W1114rrkrf8CSfccYZBmV4goF5D/SERuDxxrPrw5NKIzbec3YoJbwHiCXkg9kOPLx8jkcXwPdA7+PFaVey8Ui14BNDCi8z49OmTRvbsIH24rWOAr0fS64JXGLUcR9Qj9ARZgqAbmLogXK82bTJF4wbDIZs1iRwfvrFedmBFUPJA32msU0EemA42RhxvzErQ3+TAT3t5F5Ff+C6Y8eOSYcPmGc2gZAg+o6WiQWPvA+TAdK5V1ibQMgW3zFrlAjm99xzj33m4+R5VjgHG1FRj3FjVgtdMar5SSypjNfS3IfldayAIExZ6Remn4A+TD/df9IvWwWK2gQC/RQBfbZal+o4D814aPEUAtl4hoEfPuvQoYPr06ePecSBpExATyYSwmSAQUJxUnkcUwE9nuGvvvrK4JNr/f7772UG+mR9e/fdd61veHLxFgO8ABVQjTcWoCJsA48y4Emcc66AHk871+LcTzzxhKMtX375pemMZnwO7BGK5OPFiV9nXJKNR6qBfvDBB20czz//fAtJ2n333c0rjXc9E9ATrkJbAHc8+Hiy/aJYjB/i/QnlwahZsmSJaZUJ6DmWGHYMRYyz7bbbzgyYsgL9Pvvsk3SMUgE9Y80sAoUwK+AZCGcGxxeMWWZgMFzJ7AOUY9wA4MTEJxaMQe5TCuPGy5BZAAwBPPpcj2cGjTAwKJwTA9jPCqELxizjzyJhZnzoW6oiD32p/rStdrCAJd76CejjPb56fvNnfIsOCwT65wX0YaOZojYZOoCbWrVqGbxccMEFFk6CJxPYIIYaQCR+ORXQA3yE3FA8wANNwGA0u0e0CamAHphlQSaxyHgvv/vuuzIDfbK+AV14/fHk0m88tVyL8CGMF7zwXB9DgvjrOnXq2EwFsebeQ482eLxLG3KDhxo49wWvNwsw8RizIJWQJdoC7G622Wa2qBOwx6hINh6pbggWHAOneInxZBOnzbigZyqgJ9sOsMnaCIAeKGV2hR8+JwQJDTB4KNwrwC86ej1TtYd+Y1wRJ859QSgXXvCyAD2aANLJxgiwZgaGmQvvoUdbjDW836xTINSJUBvCXljM7QvjghGKQUlIVTaLVH1dZhu4HgtjCVdipoPQMcAdCOfcGDGcG9D318Xo8ovG0ZN1E4yzgL5c/tQphj5Q1nwHKgF92ADn+/iqffkzvkUHBQL9SwL6sNHMUJtQG6A2Wgg5ADj94r9MDQCWyFiDZx5ooR4AjYc4sRB+ADAnK0Af4SjJFlgmOz7TNZL1zZ8HYGfhYzTOH0MGLy3gSCYd4C4Kf5l0SPc9fdt1110di3Px1lOAeEJ9CO3AU8y1k6WVjI4HMEjMdWLBe865y6vQPtpPfHhiSAlhNRiG0bJy5UoDZDSl/bNnz85JnvbSjBFtRnfGkLUFtBFjKlo4pjQQn0xfdMEbnyzHf7rxIAzJe/HTHScPfdhdLSCIt34C+niPr57f/BnfogMDgf5VAX3YaFZAbQ/0eIh9YVHltttuu9rVybaSK0iuiGtUgHylugSpIFmYmliIw0+2WLZUJy/jwRhWGGLRgleeNRr5UPCAYziRASjZQu18aKOAvvxGQUAQpm2+6yegj/f45vv9V5XaV7RvIND/W0Af9rRWQG1CDyipPO8V0ARdQgqkVIAQLBY1+5mRQpNKHvqwEatKL9wwpZLXznf9+nev72YXpQeNp9+vXR7SZHXOfNdP7ctqGFMeVJX0K2oWCPRvCujD7jbVlgJSoKAVENCHDV9ZX7jHbP5d2IWzrE1CAULS8rXke/uqH3iObaCXr6Ws919F9UftC1O6KulX1Djs71TTdwT0YXebakuBUihAXD8ba5Hphkw8FOL2SVP5/vvv29oIFpyWprCAlgWqpRA4Q0EAACAASURBVI0xL801KuJY4vdZVFva/oe2TUAfpmBZX7jTmr7naq31R9jFVbvcFbip3j87YudrKev9V1H9UfvClK5K+hXtEQj07wnow+421ZYCpVCAHO1kuCHdpM+HTlYfUlSSetKnssx0SrLRsOCUzC9k2PGZgjLVy+fvyVhDznmy/lRkEdCHqV3WF66APkz3iqotoA9TuqzPR9hVs6+t9mWvVbIjc6lfUcNAoP9IQB82mqotBUqhADn6yaHORkakrCSjTSagJz2j3zGWlI1+MyxSOZJPn7SR7Nxav359y4CEx560pBgMpNnkOLzeGAykFF1rrbUsow6pHn0GGmYJ+I7Ft4sXL7ZZBHYP5jOyFZEXn9z7NWvWtDzyzAYwq8BMA+k1yVpEu8inz+ZoXBNA5/ykbGVvAL5j517KlClTLG1ntD1snEZ6T9JxAvak+GRGg7SULAznczYRo/1kryHVJ6lDSZfJYmYyQT366KO2E2+y+qmGSUBfihs4yaFlfaEJ6MN0r6jaAvowpcv6fIRdNfvaal/2WpU70O8YCPTzBPRho6naUiBLBQDjadOmuUMOOcTgFzAG8NMB/aabbmobV7GbMLBN/nvSagK+eOjxZr/66quWgQeQZRMvNmxicyU2e+IaQDiwTD55Zge22mqr1TaAYmagRo0alsseyKZdxPaStx/w5nNChNg0io2syA8P1JMzn74sXLjQgJ8c/T77Dtdkx14AnBkF6mB8YMSwBwH56cmQ49tDWlB2tz3iiCNsh2M2A2PBLRDPZlL0kx92SPa77zZs2NA2DKNf7Alw+OGHp6wvoM/yRi3lYWUFAgF9KYWupMMF9GHCl/X5CLtq9rXVvuy1KnegbxAI9AuzA3pSeJPEIhqmi/OMsNds05qHqRZeu1qTJk1WhZ9GZ5ACZVMAcMfTzqZH7GBK6kn+mKYDerzseKaBVgqedgCa3PMAOEBPyA0bimEsANzMAPBZ3759i3dPxTMOULNjKjsWs8NttAD/eLXZKArPOt5ujALOwU6s1MNL3qtXLzdq1CiDZzz4eM+BbdoCaDPzANBzLaAd4GZvBI7hO9rBxlf80C+O8e0hXShAzw7C7KiLPhgzQD/tefzxx23jLIrffZk9Bkivio7MGHA8xyTWZ8ZCQF+2+zZTrbICgYA+k7L58b2APmwcyvp8hF01+9pqX/ZalTvQ1wsE+sXpgR6nIjPaffr0sXUxI0aMKH6f4kgj5Tlgz34yOOnyuQjo83l0qkDbAF6sYnbLJSRl8803d61btzbPeaoYejzx7DfAjrMUdkoF3IHuKND7GPpx48a5ZcuWGdR36tTJvfbaa8XKPvvss469DIBdQnV84QF+44037BoAMuEzAPQzzzxjoMyDDZDj/QfogW5+Z6ddgB6AJrVqFOgxLCZOnGj9wjBg7QALd9mtl5z+Xbt2LQZ63x4P9Jyb/uHhx+PPTAReejZW4zsK4TsYNRgDhAsRfoN3Ac8+hkNifb/TMgaSX7sQveWok68lri9cAX2+3nEl2yWgDxunuD6/YapkX7sq6VdUJxDol6YHeqID2C+GTRfvv/9+A3rer3ACM/9sePrggw8aQ5x//vnZD1IlHCmgrwTRdcl/FCDO/K677jLPto9bJ3wEK3nddddNCfTAMmEzeLPZiZWHEFC/6KKL3HrrrWdhLtFFsR7ob7/9dgubadOmjRkRwDRATsx5ItDTPjz/b731loXXYDSwQRkPPx77QYMG2YMPtHPebIB+wYIF1jbi9jFIuObxxx/vDjroIDNgCKdh1oHzJQI9O+OiFR4DwnOYIcAAwFPPjMTHH39s/aF/GEWJQM91EutzzlSbXimGPuwpLesLV0AfpntF1RbQhyld1ucj7KrZ11b7stcq2ZG51K9o00CgX55dyA1r2HDw8V7H6UaYLbPwFEJ32SiSNWz5XAT0+Tw6MW8bDw7x6MSc+0IIS6NGjdxLL71kwH/jjTcmzXKDp5sHDK/+/PnzXefOnQ1wWWwKXPNv1ENPeAlhPDykjRs3tim0b7/91nXo0MEMiJdfftlAOlqIPScMB2v9559/NpgmPAbP+JprrmnXJiSHMJhsgB74X7VqldW98MILrd0YDX/88Yd9Tpvw1qOJb0/UQ8+xdevWtdkMP9tAW+gDbSH2jz4z45DMQ5+sfqpbTEAf9vCV9YUmoA/TvaJqC+jDlC7r8xF21exrq33Za1XuQL9eIND//A/Qk/565513LtFcHFrMolOiQM+xzH6TpIPCe5YZawF92H2h2lIgpQJkcQGOfe56DgSaiXlLt0NrnTp1LEtNKu904gWBaOA/WohRxyvuZxYyDRMx9HfeeaebM2eOw1Pv2wfEY5gQ1oPhwMKc5cuXpzwd12UKMNoeZjPw+BNqg/c+XUlWP9nxAvpMI5r++7ICgYA+TPeKqi2gD1O6rM9H2FWzr632Za9VuQN99UCg//MfoGeNGeGq0cJsOzPwiUDvw2WJBuCdOnjwYDuGOPt8LvLQ5/PoqG2xUcADPSExhVAE9GGjVFYgENCH6V5RtQX0YUqX9fkIu2r2tdW+7LUqd6D/KxDo1yx9yA19IoYekCcNNe9tQndJ3pHPRUCfz6OjtsVGAbLN/Pjjj1l79Cu74wL6sBEoKxAcs3nqzENhLSpZm/Srs2eHvShz2Z7Ec+V7+6ofeI52ig24Acr6fARcslRV1b5SybXawbnUr+iHsL9TTTfKHuhJekGWOQrr2shsQyHElfVtmWbAw1QLry2gD9dQZ5ACsVOANQrVq1fP237NnTvX1lrka1H7wkZG+km/MAXCauv+C9ePrGu5KEWLAoF+m+yAPllbyW7HmrZ58+bloivlfg4BfblLrAtIASkgBaSAFJACUkAKlFaBovcDgX73sgN9adta2ccL6Ct7BHR9KSAFpIAUkAJSQApIASkQoICAPkA8VZUCUkAKSAEpIAWkgBSQApWtgIC+skdA15cCUkAKSAEpIAWkgBSQAgEKCOgDxFNVKSAFpIAUkAJSQApIASlQ2QoI6Ct7BHR9KSAFpIAUkAJSQApIASkQoICAPkA8VZUCUkAKSAEpIAWkgBSQApWtgIC+skdA15cCUkAKSAEpIAWkgBSQAgEKCOgDxFNVKSAFpIAUkAJSQApIASlQ2QoI6Ct7BHR9KSAFpIAUkAJSQApIASkQoICAPkA8VZUCUkAKSAEpIAWkgBSQApWtgIC+skdA15cCUkAKSAEpIAWkgBSQAgEKCOgDxFNVKSAFpIAUkAJSQApIASlQ2QoI6Ct7BHR9KSAFpIAUkAJSQApIASkQoICAPkA8VZUCUkAKSAEpIAWkgBSQApWtgIC+skdA15cCUkAKSAEpIAWkgBSQAgEKCOgDxFNVKSAFpIAUkAJSQApIASlQ2QoI6Ct7BHR9KSAFpIAUkAJSQApIASkQoICAPkA8VZUCUkAKSAEpIAWkgBSQApWtgIC+skdA15cCUkAKSAEpIAWkgBSQAgEKCOgDxFNVKSAFpIAUkAJSQApIASlQ2QoI6Ct7BHR9KZCHCkycONFVr149D1v2T5Pmzp3rGjVqpPaVUQHpV0bh/ldt6LgH3RZbbhl2knKs/c3XX6t9AfpKvwDxnHN71N/M9e/fP+wkql1qBQT0pZZMFaqaAgcffLCrW7duiW4vWrTIzZw5s2CkAH432WQTN2vWrKzaXFRU5Jo2bZrVsZVx0HHHHecmTZpUGZfO6ppqX1YypTwo3/Wb/eN6YR1UbSkQYwVuPL1tXv99jqv0Avq4jqz6lTMFpkyZ4qpVq+a+/PLL4nPOmTPHjRgxIugal156qVt77bXdgAEDgs7jK7/66qsO4+PPP/9c7XxXXXWV23HHHV23bt2yupaAPiuZChZI8x2Y8719Avqw50O1462AgL5yxldAXzm666oFpABAP3XqVHfbbbeVaDVAfuCBB7r27du7rl27ui5duri2bdu6K6+80h122GF2LHWvu+46g+kxY8a4DTbYwC1evNjdcsstdj4MheHDh7v77rvPjt9tt93csGHD3G+//eZq167tZsyY4fbcc0+38cYbu+eee86mMTt27OguuOACV6NGDTd//ny7LvUbNmzoli1b5jp06ODGjx/vtt9+e7dixQp3ySWXuCOPPNIddNBBFkazzjrruEceecQNHjw45SgI6MNu0HwHUrUvbHwF9GH6qXa8FRDQV874CugrR3ddtYAUAMqB4KVLlxa3evTo0W727NnuhRdeMGhv06aNu/vuu90zzzzjnnzySdezZ0/3999/u5EjR7pWrVq5IUOGGIBfe+21BvaPPfaY22uvvcxDf+655xrAU/bee2/HuTkHn51wwgkG8gsXLnRnnXWWa968uUH+5MmT3cMPP2znGTRokIXSTJ8+3WC+ZcuWdmz37t1dv379XM2aNd28efPsOwyCPfbYw4yQ/fbbT0BfTvehgDlM2HzXT0AfNr6qHW8FBPSVM74C+srRXVctIAUA9p9//tl98MEHxa0GqPn9pJNOcr1793ZLlixx7dq1c2ussYbFnuOh32WXXewHeOb4iy++2L3++utuu+22MwOhR48eBvkAvS8A/ahRo9z+++/v6tWrZ550wJvjXnzxRTv3Ntts4zp16uR22GEHMwDuvPNON3bsWIdXnWP5/fPPP7dQno022si8/oTitGjRwh1++OGuTp06ZiSki5GXhz7sBs13IFX7wsZXQB+mn2rHWwEBfeWMr4C+cnTXVQtIgVQhN3ThmGOOcVdccYXF1/N/YBuvObD/zjvvWCgO4S5PPfWUhcZ8+umnFj6DZ56QnWRATygOYN6gQQMD+mbNmrkNN9zQgJ7/Eyv/ww8/uH//+9/u0EMPdffff38JoH/iiSfc888/b6E7XIcFvaecckpxDD2LY/Hme6A/88wzHT+JRYtiy36TCpjLrh01810/AX3Y+Kp2vBUQ0FfO+AroK0d3XbWAFADoCa25/fbbi1tNOA2wzOcvv/yygfXQoUPNI3788cdbvPo555zjTj/9dHfUUUdZmM1bb71l8fRPP/20ATXAvt5661l4TNRDnw7oOTeQj8d9q622sth5PPL84FUnvKdPnz4Wd8+xV199tYE8aQr9othEoE82FPLQh92g+Q6kal/Y+Arow/RT7XgrIKCvnPEV0FeO7rpqASlAeAphKtHy3XffWVz6rrvu6g455BDXt29f89CfeOKJ7qGHHnJ//PGHW7VqlYXg4K1//PHHbbEsRgDhO0cccYTFx7O4FYDHy04hhCYd0OM1x8DAcKCsXLnSvPzEzeO557rEzmNA1KpVy+L4uQbXE9BX3E0nYA7TOt/1E9CHja9qx1sBAX3ljK+AvnJ011VjrAAQT3w74TUA/Prrr++WL19uPSb85dtvvy3uPaE0v/zyS9JUk+kkAs4XLFhg9bbcckv39ddf27XWXXdd9/333ye9Vmkkl4e+NGqtfmy+A6naFza+Avow/VQ73goI6CtnfAX0laO7rioF8loBAX3Y8AiY462fgD5sfFU73goI6CtnfAX0laO7rioF8loBAX3Y8Ajo461f/T32cbOLisI6WY61mzRtqvYF6Cv9AsRzzp3T5RjtFBsmYZlqC+jLJJsqSYF4KyCgDxtfAX289bv01Pq2D0VFlOnv1y71ZXT/lVqyEhWkX7z1C+td/tYW0Ofv2JR7y8ihTopFv6lRuV/QObfzzjs7sqyQjz1ZIUsLsefEhPtCTvZvvvnGcqvnspDLnY2gMvXft5nFrWSXIQVlpsKiVRaqZnNspnNl+z2x+1yX3WFTFXafJZ6fxbLpioA+W9WTHycgiLd+j11UMTD/7hcbugGP7VhqMXX/lVoyAX2YZAWlXw67mlenEtDn1XBUbGOAtmOPPdYtWrSowi5MJpj69etbjvRkhZSOEydOdA8++GDx148++qh75ZVX3PDhw3PaTnZ2ZbMnv2A11cl9m8kDf8cdd7h99903YzvYPIpzs2nUAw884CZMmGC54curkJ6STDYssAXsTzvtNNtd1hcWzo4bN86MF1JlkhHn+uuvT9kcAX3YSAmo4q2fgD7e46vnN97jG9a7/K0toM/fsSn3lnmgZ9dScqxvuumm5t0FBoE+0ieSBhHPODBNKkS862xqRO70zz77bLU27rTTTu6uu+6yfOjkQQdo+T+ebfKyk3+dY4DpwYMH2/lJCYlX/uSTT7Yc7QD9v/71L/uXH3ZgBehHjBhh7SR14++//+4GDhzopk2bZrng58+fb4ZCmzZtkuoGXGO8ALvkcR8yZIjV69atm4Et2WGoT0pJdCGPPHBMm9gYijaPHz++GOg7duxo6SBJGcm12TSKzZmoR354rgFgv/TSS65z587ur7/+cu+99577z3/+4y6//HLLfDN16lTTyM9G3Hzzze6LL76wTaL++9//Zj3+ZLd57bXXXNu2bd3SpUvNGFq2bJk7//zzi89Bykw2q+IYDJRLLrkkrWEioM9a/qQHCgjirZ+APt7jq+c33uMb1rv8rS2gz9+xKfeWeaC/9tpr3ZprrmkeZYD5q6++MngFut9//303aNAg+26zzTazjZE4np1RzzvvvKRtfOONNwzeAVzgu2fPngaRQCyhM7vssot56EePHm3A/vHHH7sBAwYY3GJEsNMpIP3JJ5+4s88+23kPPSEvwDc7rLJZ0wEHHGBQyo6pGBm0k02eEgupITEQ2OSJnVwBauD2zTffLL4mxgw7vnJ+oJ+88t5gAJhp80033VQM9DNmzHCTJ092Dz/8sIUtcW1y0vMi4HfaRH02nMJI8bnpMZZoN4bEGWec4Q488MDi5tLnHj16mJEwa9YsM3gYC3Z5bdKkSYluMXb33nuvfbbXXnuZsbXPPvvY7xgGpM3EgPGFNJfMFAD8jCPn7927d8p7TEAf9vgJCOKtn4A+3uOr5zfe4xvWu/ytLaDP37Ep95Z5oCfkhE2OANJmzZqZxxkPNiEaADO5zmfOnGmQC6gCjnwPFCcrgC4x8niD8XLjPQZi8TxzDQ/0GA8eQgFkjAXgf/PNN7cYb87Pvx7o8ZLzQ1soQD2wi+GBxxkvfrICoPPdr7/+al5xQJiNoaJAj1HRr18/MzDIEw/ssvsqHnv0SQR6DANi8AFnNoPiWIwCAJ0ZhegGUYTaMGtBvD7tYPMp+rp48WJrd2JhRgNDZvvtt7fZEeC7UaNGJQ5j11nGh8LsB8aDDwXCYMGQigL9qaeearMlGGjbbrutjelhhx0moC+np0xAECZsvusnoI/3+Ob7/af2hd1/ca0toI/ryCbpF6EggCReYh+m0aFDB/MuE/MNKAN7xFkD7D5eHCDG684fEeKyAe1bb701JdD36dPHDAOgl3MQslKvXj0LR+E7D/R+R1Sa6mO6AXp2WCUMB7gm1McDfYsWLWzRJ556X/BiswsrC3yThQD54+gfkNy8eXPrOwttMTr8rMCzzz5rUI4XG7hHp1RAT13ay6wABg5eeHZ6BejxlhNikwzoCcPBe//222+bJnjjCcNJLOiNsUB7N9hgA3fZZZdZaFK0MNNBmBKFGQji+9EcAwhNKGjtCxoxq0KYEAtjfbgPBgMzKfwkFoyCfC16oYWNjPQL009AH6af7j/pF6aAaidTQECfRJXQrCoNGza0sIl33nknr+46FkaSdQXPeePGjS28BPAESoFrPOuErADGQGF0ASj1AGDgn1h2IJKwkWQFzzKhKMSGA4p4pvFy4xG+6qqrMgI9IA/IEiJz+OGHmycazzaGxtFHH22eeYwCgPuQQw6xOPV0QA/M4yEnph3gpb+Et9x3331lAnri5YFzPOMYSZyHtrC+IBXQE8JEv+gTIA+gM2sRLYA2wE2fiLlHt5EjR1oIDeMVLaTMi8bIMwvCmPlQIDRjIS6GA8ZH//79ra3MDuy+++42W4KBlCpWXyE3YY+ugCXe+gno4z2+en7jPb5hvcvf2gL6JGOTKatKpqwlxIPjYcbDmm/lySefdIB9tWrVLI8yi1tvueUWCxUBIglFqVu3rsXJs5DSh3G0b9/eYs+JtacuMfWAYqoCYLLolOwreP75IfY9G6BHX2LO8TrjhSbmG0MDcAaK8YRXr17dFtjivWb2AK94urSWGCT0i8W0S5YsMQ94NOQmmYceg4aQGxYKJ4bcTJkyxWYLKCtXrjQD7rnnnnN77rmnGRdRDz3nAchZR4BXHwOEmH70iBa89xSMD85VmsKaBWZeKPQPowfjhdCcCy+80NYuAPjMTlBYTMzYpCoC+tKov/qxAoJ46yegj/f46vmN9/iG9S5/aycF+t122y1pBhIgivhjPLB4Swm7YMEf4IU3loV3eHcJmeD/ZOwgNhsAw2O4xRZbWPYNvLZM/ycrvXr1smwjxDDjUeR6rVu3tmsQ7kHbfvzxR4MmIC7ZsXg4fSaRuXPnOuKH8ZASZgAodu3a1byTwCyeSx9LDKQRapEO6Alj8FlLiFHG40rMNAUIA4Q90BOzDIQCcXhTk2VoSdYvdGaRIwAKTBInTlx3rgqhMMRQR3O9o/VPP/1kbcUYAeyT5SrfY489LCQFzy5e7lq1apVoFnBLTHt5Fu4/4uGj7ed6wCpgm1hoLwBL21kUmistuccXLFhgWmIkJbYn2g405dosDGbMyZKT+AzQ/tJkt0ns58Ybb2zGz7x581LKzxoE0pRmyr0voA+7gwUE8dZPQB/v8dXzG+/xDetd/tZOCvR4F5NlIGGxHjDcvXt3m7on5AAYxtMJbBN2cOONNxrIkxGEcAGAFwAGxIFlMokAingrkxVguEGDBhbOQagEKfiIe+YagAreZEI+AHMgKdmxgDUeXbyugDFhDJwX44M2EqdNPDeQjcea32kT7aWPGBGp8p6TztFnLcGjyrWGDh1qISJkLUEPjBHahtd26623Nm8pfUiWoYUwlsR+AYcstsSzihFA39Ez3wrjT7+jBdAuz3zr6TTAEPLGVfQ4wJm49couzILwfLz77ru2QDWfi4A+bHQEBPHWT0Af7/HV8xvv8Q3rXf7WTgn0yTKQAMnENBOGQawwcdR4ugFOwjMAOeKTAXdAl5AMAPyiiy4y7yXhE8AqdX12k0Rp0gE9nnY87niAyedN+EQyoOd7QiTwhBK+8dFHHxmEJwI9YRcs/APCOZ4fQksA+3QbGfmsJXhEOR5gpxDWQb/pI/HaLG6kzSzixAhJlqHlnnvusWOi/SJdI55mwiWIa6cfKoWvAAYeMxqkosz3IqAPGyEBQbz1u/TU+hayWBFl+vu1S30Z3X+llqxEBekXb/3Cepe/tVMCfbIMJMAzXmgAnvAMYN4DPYsVgXZipwmbAUY90OOVJxTHhzqQypBY6mQlCvQYDuQr9x56fw0W9BGKQtiAB/rosXjR8b7jESUFIt5+PNwe6K+55hqLdcYDjpcd4GYBK7MPpQF6QjjICsMiSQrnJ7QD7zxhP99//72FsDBDwSLKZBlayLme2C9SGRLOQlsILyGDCuOhIgUqSgEBfZjSVQ0Ijtn2uzDBEmo3adrEzS6qGGAuS8OrNz3HTZo0qSxVK6ROVbv/ci2q9AtTNN/1C+td/tYuFdDjSQZQ8R4DmcB0NkDP4LIojzzf1GU3T+/VTpSGGHVmAcjEQnw7KQFTAT2L/5Idi0eb6+DpJi6exZPMCOAZZbaAUBy8+3xO2kLCbzie8CDaxexEJg89Mfnka8fgALzxvKIJXn+ymAD1pH5kESeATghNsgwtzHokAj3ZUggRIezphhtusNAj1iLkurAOAYMmUzx1Lq+78847WzYYQo2SFUKemMmJxqOHZh1K1X7Gikwymfrv24zhR5gRMzGZCsZby5Ytszo207my/Z4ZIa6LsZuqoD0Lm1kjka4I6LNVPflx+f5Cy3X7ztvtK9d9l9yt8wlTv/xr31TtRgF9gMy5vv8CmpK0qtoXpmi+6xfWu/ytXSqgZ0EfmTmAYRamAq2EpxAPnMpDT8o9AIPYYWKc+SGWnNj1ZAWPNCBLOAuzAHjbE4GeOHjOyb/JjiXkh5AXZgKAHL+rKG3ndxYeAjRkoQHOSCHIQl6+A9p8VhXAPlmJZi0hpAhYJ/MLgI83nlkGzoGRwDnw5JPvPVmGFmY7vHa+X6QeZBaBeHT0YqEtnv9cF7+xFIskK6qwdoKFzBhCyQqzHOjE2glfMmUdKmvbCc0iWw8pNdMV32YMxWgqz3R1MMI4N89GpqxIZW1/tB4Za6L3PBlvWNfiC88umq6zzjr2EetcMDxTFQF92Kjk+wst1+0T0IfdL7munevxVftyrUDY+TS+YfrFtXap01biAQQO8HYD3cAx4JxNIcyF0BtAHU8n+bATCzHl3lggK0g2xWcQiR7LYtR1113XvPS+AMcs4ox6MIF4gByvMFlG6BuAlyljSvSaxMzj9UyVuSfarlQZWhL7ic7E3BOnmSzbTDa6ZDrGAz2Qh+FBNiK0AQYxpAjzwdghHSQbPLHAGA8vszRkCEq2kRNtJu0innZmPwBa/s94Y+AwK8IxwDR50zk/WqI/YUYe6EnrCNjzw0JXZkxYh5AsUxDpMTGmMBSY/UlWgGuyEjHehD8xe0I9wrEwMLlXqE+aSnRh1ob7mjaRy502MyvjgZ4sNSy2Ji6eaxN2RfYm6pExiGsA2KSo9FmR2ESKtR8+A5Pf3MnPRmD8AtqElJUm243fJIxZLZ4ZwJ1sUtE89YS48bwxJmhw3nnnmTFMhp5kRUCf6elJ/31Ve+EK6MPul1zXrmr3n/TLtQJh58v3+y+sd/lbu9RAn6uu+PjzxPMBfNkaCLlqS7Lz5HvGlFz03QM9MwAYJEAvwIyxBrwC3e+//75tJsV3zDow+8HxGC9AYbJCXnjgHcBl0TFZhPByYywReuV3ik2WSQkjgnUFgDTGGLMo3kPP7EmyTEEsxMbIoJ2EMCUWFm9jIBBSRcpOgJrdbqN56DFmWOvB+QHevn37uoEDB9rMCMCcmIfeEOZKxgAAIABJREFUb+DEBlqELXHtXXfd1XbT5XfaRH1CxnxWJIwXjCU2lMKQYD0K6zx8oc94zTESZs2aZQYPY0Ha0iZNmpToFmNHNiYKi8wxtvxCcwwDjFQMGF8wsgiRQwuMBQxnUsmmKgL6sCcs319ouW6fgD7sfsl17VyPr9qXawXCzqfxDdMvrrUrDejjKmgh9csDPTMSxOgDpM2aNTOPMx5sgI/Fxnhx2RgKyAVUAUe+B4qTFTLzECOPxxjjjIXSQCyeZ67hgT5ZJiXgn4XMzEpwfv71QI+XPFmmIAwP1ingxU9WAHS+I4QJrzggTMhVFOjJVc8aD2YEWLxN6lFCoPDYo08i0GMYEIMPOJPmlWMxCgB0ZhSiG0v5rEjE69MOUr5iGC1evNjanViY0cCQYTaH2RHgvlGjRiUOY9G5B3JmPzAe/CZgGCwYUlGgf/zxx20fCNaQsCaBFwLHp5r9EdCHPclV7YUroA+7X3Jdu6rdf9Iv1wqEnS/f77+w3uVvbQF9/o5NzltGKAggiZfYh2kQ2493mZhvoJ2sPSwSBdh9eAlAjNedh5S4bECbTcVSAT0LmzEMgF7OQchKvXr1LByF7zzQJ8ukBNCznoEwHOCaUB8P9CzATpYpCFhlgW+yECAvIv0DklmPQd+BWowOQlBoR7KdYlMBPXXJ4MSsAAYOXngWRAP0eMsJsUkG9ITh4L1nwTOa4I0nDCexoDfGAu0lRIzdcPHuRwszHYQpUfC6E9+P5gA6nn0KWvtCHD+eefaQoGDMMMOCYcBMCj+JBaMgX0u+vzCqWvsE9Pn1pFS1+y/X6ku/MEXzXb+w3uVvbQF9/o5NzltGrD+LgPGcN27c2MJLAE+gFKjFs07ICmAMFEYXgFIPAAb+iWUHIgkbSVbwLBOKQmw4oIhnGi83+f6J5c4E9IA8IEuIDFmM8ETj2cbQSJYpiDj1dEAPzBNyQkw7wEt/CW9hF9+yAD3x8sA5nnGMJM4D/LO+IBXQE8JEv+gTIA+gM2sRLbVr17a4fFKe/vXXX6Ybi84JoWG8ooW1FdEYeWZBGDMfCoRmEyZMMMMB4wN4R0vWGJC1h7HFIFMMfc4fMzthvr/Qct0+AX353EdlPWuux7es7UhVT+0LU1T6hekX19oC+riObIp+kV0IsCcrD1DI4lYy9RAqAkQSilK3bl2Lk2ezMB/GQWpOQjmItacuMfWAYqoCYLLolOwreP75IctQNkCPN5mdgPE644X2WYcA52SZgpg9wCtOeEqqgkFCv8jQRLpTPODRkJtkHnqgl5AbFgonhtyQDpXZAsrKlSst7p1NxVj4jXER9dBHsyLh1ccAIaYfPaIF7z0F44NzlaawZoGZFwr9I7MTxgseePZbYHdaAJ/F4j6MiVmWVEUhN6VRf/Vjq9oLV0Afdr/kunZVu/+kX64VCDtfvt9/Yb3L39oC+vwdm3JrGaEweGajud7xNJN9COAk3AWwTxZfzWJm4s0J38DLTf79aAFuiWkvz5IqU1CmzES0nSwwfoOz0DbuuOOObsGCBaYlRlJUz8Rz+6xILAzGUCFLTmJWJNpfmuw2idcg6xTGz7x581J2jXaiQSrPvK8ooA+7O/L9hZbr9gnow+6XXNfO9fiqfblWIOx8Gt8w/eJaW0Af15GtgH4RckJoTLSw8JRFoJVR8j0zEbMg7MeAt5x0nvlcBPRho1PVXrgC+rD7Jde1q9r9J/1yrUDY+fL9/gvrXf7WFtDn79ioZTFTgLAcZjRIRZnvRUAfNkL5/kLLdfuO2Tb9zsOlVbNJ0yZudtHs0larsOOrNz1HO8UGqJ3r+y+gKUmrqn1hiua7fmG9y9/aAvr8HRu1TApUmgIC+jDp8/2Flq59beotdzXW/DtMgMDa7LvAGp98LdWbCehDxqaQn4+QfueqrvTLlZLxOo+APl7jWZC9IU0judnJ0OLjyolPJ+sLG1sR2sOi1tIUFqayyJTMOIVcWHjbsmXLUvc/tM8C+jAFC/mFC9Bf13hBmAAxr31TjRvloQ8Y40J+PgK6nbOq0i9nUsbqRAL6WA1nYXaGBaLkWic/u8+HziZPpIC84YYbSqTPTNdDssawsHTAgAGWwYYUl+kWqhaCWqTcJKNQRcfcC+jD7o5CfuEK6DOPvYA+s0bpjijk5yOs57mpLf1yo2PcziKgj9uIFmB/yH9PqkU2MvK7l2YCetJhsiMrhRSP/JCznZSabEbFBlVffPGFq1+/vvv+++8tlSSZezAYTj31VDsOrz8Gw5gxY9xaa61lu9iycNVnmmGWgO9Y+MuurswisEEWn5FznoxApP2sWbOmGzZsmM0GMKvATAPpK8k6Q7v69+/vSMXJNQF0zk++eRbH8h35+SmkwmTjr2h72FyK/QJIGwrYs/srMxrkwWfzL59OlPazfwB7A5Dnnhz3ZLRZZ511bGMuctknq5/qdhHQhz1IhfzCFdBnHnsBfWaNBPRhGkm/8tMvrmcW0Md1ZAukX4DxtGnTbDMl4BcwBvDTAT07sk6dOtUNHTrUYPuMM86wzbIAXzz0eLPJkb9o0SIDWfKts9HV2LFjLR8+1wDCgWXytzM7QNpOQJoNoHwhfzwLWcnJz3e0i9heUkMC3nxOiBB1Ro8e7cjxD9SfcMIJ1hd21QX42Z3WZ/7hmldccYUZEMwoUAfjgxShGCStWrVy9957b3F7PvzwQ9v0i02xyF3PxlSkHWVPADaGop/8kAqTmQn61rBhQ8tDT7/Y1ZYNpVLVF9CXz4MioC8fXfPlrAL6sJEo5OcjrOe5qS39cqNj3M4ioI/biBZYfwB3PO1senTMMcc40l7yxyod0ONlxzMNtFLwtAPQO+20kwE4QE/IDZtYYSwA3MwA8Fnfvn2LN23CMw5Q77PPPu6jjz6yjbSiBfjHq80OuXjW8XZjFHAOdpulHl7yXr16uVGjRhk848HHew5s0xZAm5kHgJ5rAe0A9z333GPH8B3tYOMqfugXx/j2+A2qWrRoYbvlog/GDNBPex5//HHXrFkzazazFswqsBkXOffRkRkDjueYxPrMWAjoy+eBKeQXrjz0me8JAX1mjeRhDtNI+pWffnE9s4A+riNbIP0CeNnkiB1cCUnZfPPNXevWrc1zniqGHk/8tttu67p06WK9fPrppw3cge4o0PsY+nHjxrlly5YV7xDLLra+sEPseeedZ7BLqI4vhLWwAy3XAJAJnwGgn3nmGQNlvO8AOd5/gB4vOr83aNDAgB6A3nDDDUsAPYYFO93SLwwD1g6wcPeDDz5w77zzjuvatWsx0Pv2RHecpX94+PH4MxOBl37gwIF2XQrhOxg1GAOAPeE3LKrFs4/hkFgfA4SZAQwkv3YhettQJ19LIQNzPmiaKcuNFsWmHyUBfdhdrOdX+oUpoNrJFBDQ676oNAWIM7/rrrvMs+3j1gkfmTRpklt33XVTAj2wTGgJ3mzyut9///2uU6dO7qKLLnLrrbeehblEF8V6oL/99tstbKZNmzZmRADTADkx54lAjyh4/t966y0Lr8FomD59uoUG4bEfNGiQGzFihEE7580G6NlVlrYRt49BwjWPP/54d9BBB5kBQzgNsw6cLxHo2ZUXrQidAcKZIcAAwFPPjAS799If+odRlAj0XCexPuf85JNPko6/YujDHotCBhZ56DOPvYA+s0byMIdpJP3KT7+4nllAH9eRLYB+AcTEoxNz7gshLI0aNXIvvfSSLSxlZ1Vi2VksGy14unfYYQfz6s+fP9917tzZAJfFpsA1/0Y99ISXEMZDLHzjxo1tYem3337rOnToYAbEyy+/bCAdLcSeE4aDN/znn382mCY8Bs/4mmuuadcmJIcwmGyAHvhftWqV1b3wwgut3RgNf/zxh31Om/DWo4lvT9RDz7F169a12QxCiDBiaAt9oC3E79NnZhySeeiT1U91mwjowx4gAX2YfvleW0AfNkKF/HyE9Tw3taVfbnSM21kE9HEb0SrUH7K4AMc+dz1dB5p/+eUX88CnKnXq1LEsNam804n1gGjgP1qIUccr7mcWMslODP2dd97p5syZ4/DU+/YB8RgmhPVgOKy//vpu+fLlKU/HdZcuXVqiPcxm4PEn1AbvfbqSrH6y4wX0mUY0/feF/MKVhz7z2AvoM2uU7ohCfj7Cep6b2tIvNzrG7SwC+riNqPqTlwp4oCckphCKgD5slAr5hSugzzz2AvrMGgnowzSSfuWnX1zPLKCP68iqX3mlANlmfvzxx6w9+pXdeAF92AgUOtDXWDP9TE+YOplrkx529uzZmQ+spCOqNztHO8UGaF/Iz0dAt3NWVfrlTMpYnUhAH6vhVGekQG4UYI1C9erVc3OycjjL3Llzba1Fvha1L2xkpJ/0C1MgrLbuv3D9yLqmUrEKCOgrVm9dTQpIASkgBaSAFJACUkAK5FQBAX1O5dTJpIAUkAJSQApIASkgBaRAxSogoK9YvXU1KSAFpIAUkAJSQApIASmQUwUE9DmVUyeTAlJACkgBKSAFpIAUkAIVq4CAvmL11tWkgBSQAlJACkgBKSAFpEBOFRDQ51ROnUwKSAEpIAWkgBSQAlJAClSsAgL6itVbV5MCUkAKSAEpIAWkgBSQAjlVQECfUzl1MikgBaSAFJACUkAKSAEpULEKCOgrVm9dTQpIASkgBaSAFJACUkAK5FQBAX1O5dTJpIAUkAJSQApIASkgBaRAxSogoK9YvXU1KSAFpIAUkAJSQApIASmQUwUE9DmVUyeTAlJACkgBKSAFpIAUkAIVq4CAvmL11tWkgBSQAlJACkgBKSAFpEBOFRDQ51ROnUwKSAEpIAWkgBSQAlJAClSsAgL6itVbV5MCUkAKSAEpIAWkgBSQAjlVQECfUzl1MikgBaSAFJACUkAKSAEpULEKCOgrVm9dTQpIASkgBaSAFJACUkAK5FQBAX1O5dTJpIAUkAJSQApIASkgBaRAxSogoK9YvXU1KSAFpIAUkAJSQApIASmQUwUE9DmVUyeTAlJACkgBKSAFpIAUkAIVq4CAvmL11tWkgBSQAlJACkgBKSAFpEBOFRDQ51ROnUwKxEOBiRMnuurVq+dtZ+bOnesaNWqk9pVRAelXRuH+V036Sb8wBcJqF8L9179//7BOqnapFRDQl1oyVahqChx88MGubt26Jbq9aNEiN3PmzIKRAvjdZJNN3KxZs7Jqc1FRkWvatGlWx1bGQccdd5ybNGlSZVw6q2uqfVnJlPIg6Sf9whQIq637L976hfUuf2sL6PN3bNSyPFFgypQprlq1au7LL78sbtGcOXPciBEjglp46aWXurXXXtsNGDAg6Dy+8quvvuowPv7888/VznfVVVe5HXfc0XXr1i2rawnos5JJQBomk/STfuWkQNhpBfTx1i+sd/lbW0Cfv2OjluWJAgD91KlT3W233VaiRQD5gQce6Nq3b++6du3qunTp4tq2beuuvPJKd9hhh9mx1L3uuusMpseMGeM22GADt3jxYnfLLbfY+TAUhg8f7u677z47frfddnPDhg1zv/32m6tdu7abMWOG23PPPd3GG2/snnvuOcc0ZseOHd0FF1zgatSo4ebPn2/XpX7Dhg3dsmXLXIcOHdz48ePd9ttv71asWOEuueQSd+SRR7qDDjrIwmjWWWcd98gjj7jBgwenVFhAH3bzCQikX5gCYbV1/0m/MAXCauf7/RfWu/ytLaDP37FRy/JEAaAcCF66dGlxi0aPHu1mz57tXnjhBYP2Nm3auLvvvts988wz7sknn3Q9e/Z0f//9txs5cqRr1aqVGzJkiAH4tddea2D/2GOPub322ss89Oeee64BPGXvvfd2nJtz8NkJJ5xgIL9w4UJ31llnuebNmxvkT5482T388MN2nkGDBlkozfTp0w3mW7Zsacd2797d9evXz9WsWdPNmzfPvsMg2GOPPcwI2W+//QT05XSP5fsLTe0LG3jpJ/3CFAirrfsvTL+41hbQx3Vk1a+cKQCw//zzz+6DDz4oPidAze8nnXSS6927t1uyZIlr166dW2ONNSz2HA/9LrvsYj/AM8dffPHF7vXXX3fbbbedGQg9evQwyAfofQHoR40a5fbff39Xr14986QD3hz34osv2rm32WYb16lTJ7fDDjuYAXDnnXe6sWPHOrzqHMvvn3/+uYXybLTRRub1JxSnRYsW7vDDD3d16tQxIyFdjLw89GG3j1640i9MgbDauv+kX5gC/9/emUDbWL1/fJcmU0QytBQNlLGoJDSpqGQhc1qJskgZooEWqYhEZUjDIhKiMpYy00TxzzUUkqnBUKFICqn/+jy/te96Hefce6793nv2OffZa92lzn333s/+7v2e+32e/d3Pdqvt+/pzG52/tZXQ+zs3apknCMSS3GBe48aNzRNPPCH6ev4bsk3UHLKflpYmUhzkLrNmzRJpzMaNG0U+Q2QeyU40Qo8UB2JepkwZIfRXXXWVOfPMM4XQ899o5fft22eWLVtmbr75ZjNhwoRjCP2MGTPMvHnzRLpDPxzoveeee9I19ByOJZpvCX2HDh0MP5FFD8We+AL0/Q+a2nfic0tNxU/xc0PArbauPzf8UrW2Evo4ZhZdNBHLoOQijmr6SIogAKFHWjNixIj0ESGngSzz+ZIlS4RYv/jiixIRb968uejVO3XqZNq3b28aNmwoMpsVK1aInv79998XQg1hz58/v8hjghH6jAg9bUPyibife+65op0nIs8PaxR5T48ePUR3z7N9+/YVIk+aM3soNpLQR5smjdC7LV79g6v4uSHgVlvXn+LnhoBbbd/Xn9vo/K2d8oSeQ4tIGlq2bHnCswBpGzx4sEQ9teQ+BJCnIFMJlt27d4suvWLFiuamm24yjzzyiEToWWeTJ082R44cMf/9959IcIjWT58+XQ7L4gQg36lfv77o4zncCoEnyk5BQpMRoSdqjoOB40D5448/JMqPbp7IPf2inceBKFq0qOj46YP+lNDn3Nr1/Q+a2ue2FhQ/xc8NAbfauv7c8EvV2t4T+okTJ0omEHTHhw8fFnKyatUq0SX37t3bnHrqqWbz5s2Sjo+sIBAZJAZk90BmgPwBErV06VKJsAYzjUC+0CnzGVFLZAxES2mPSCpR1wMHDph8+fLJYcKVK1eaMWPGmJIlS0o2EWQKwVSGwUVCNpHx48dLphEONw4ZMkQiszgHZCY5//zzRXONTZC0Q4cOmQEDBpi5c+dGzWKSqgswFccFiUffjryGtVegQAGzd+9eGSprc9euXenDRkpz8ODBqKkmM8IGcr5161apV6pUKbNjxw7pi7X6+++/R+0rK1hrhD4raB3/rP7BVfzcEHCrretP8XNDwK227+vPbXT+1vae0NuoeMeOHUWrDIGHvBON5AddMYf/0AtXq1ZNiFS3bt2EKC9fvlyIDocP0TETZQ9mGuGgYvXq1U2xYsWEwJOBBIJOSkEurSGdYPHixSXqivOAjIIDhjyLLUQ/icpGK+3atRP9NNFS6iOR4KDkl19+KY4DmUkgfv379xctNbKMOnXqmJo1a5qPP/74uCwmZE/RogjkFAJK6N2Q9v0Pmtqn8+uGgFttXX+KnxsCWjsaAklB6EnhR/o/8myPHTtWIthIGDggSLE64cWLF4tuGa0yZB2yTySfLCQQam72jMw0QnYQ5BIcMLz66qslvSCSCbKBEEGnkJmEPiDeOAhEXomKknaQOtEK0VLagKDzHJFYsoxA6MkLDmnnYqJy5cql3zgKqWdXARlFtCwmuoQVgZxCQAm9G9JKWBQ/NwTcauv6U/zcEHCr7fv6cxudv7WTgtBzSc6kSZOEGEPAIeUDBw5Mz6ONHAZiTArBNm3aCIlH6oIuGfkMhB6dM2Q6mGmE1IFEvll85PkmswjSGMh22bJl03X3OALsAvBDe1YygVSCGzijFeQ49913n+wSINuBzFtCj520w+FGtNDBdIg4FOQXj5bFxN9lpJalGgJK6N1mNOw/aI1L7HYzKKI2O5Pco+Br8d2+U+p0kl1cX0vY6y/scap9bogqfm74pWrtpCD0aIXRtkPiIdoQYqLmHHbdsGGD6OSJyHOgEJ07F/PwLLp7soAQEedwLKkDIzONQPRpCwkMEXMi6dSHWHMRD3KcN998UyQ3OAs7d+6Uy3rYCeAyIaLq0QoOCKSc53AsyGgSSegh/Y0aNZI2yFdOphKcALT30bKYpOoiRHqELIkLk+yZBDTinIlYu3atYMHcZaWwRlgX9sKmrNT16VkcPg68ZnX8rmNQQu+GYNh/cP+vtr/k2w2p5Kz9XMlBSugdpi7s98PBlKhV1T43RH3Hz210/tZOCkIPySZjSJ48eUQfzwFXCDaEmwOzkDaIMRH2J5980vz1119yxT2aeNL1Qewg7UTjIzONoGunLdqmLaL/aPOJnuM80O/Ro0elXbKTDBo0SNrmB/07cqBopUmTJiLR4bDr/v37JUsKsiEIfqtWreTiH/KRQ/yLFCki7ZHKsFevXjGzmPi7jNwsQ0LFuHHGbD50nCZ2ZHDMXnvtNZEuZVbAG6kTOynsjLAmkEglc8EpZU0GL5/KifEooXdDOew/aEro3eYj7NpK6N0QDfv9cLPm+NpqnxuivuPnNjp/aycFoSdyvXr16vSsHhZOMnpAutevXy8HVClENJHfsJ1sP4PkQZiRyFAiM43wWZUqVSTaTyYdWyBT5J5HMhMs5PhGMsPnRI8rV6583Ayj9cdBIMPJ9u3bhbzTtrUhWAF5EE5IkHxGy2Li7zJys4zdFW5aJdsPxJ15y4zQc6bBnnEgrSQ/OExgzmHmhx56yHz//feSTYisL0TsSTWJw9C2bVt5jqg3DgNZjjhsTQpKdnLsGmAO4smKlDdvXjmUjWPJrgI7DawR5hy7kITFytbE73AqKaSj5MB10B4Og+M4IiOD2OMosqOBw8kuUrRsTxwC54A15zyQlU2dOlUOhEerH2vmlNC7remw/6ApoXebj7BrK6F3QzTs98PNGiX0uQ2/sMfrS3tJQ+ghfT4WHAGcishCtD0aefdxDIm0iXShpOpE+gT5hRgz1xkRenY05syZIxc5FSxYUGRKXP4F8cV5I5rNLssPP/wgRJZzEZxL4PwF5yHoAxIOWWbHhN0BLmmCSHNewxZ2BuLJikQdZFrs1kDqyS/PWDiXgVSsRo0a6XcYBLM1saNAHZwPnBgcEi6GQnJl7fnmm28kLz155BcuXCgXVXEbLbtK7EhFy/bE4XFkXIyrVq1acjg7Vn0l9Nmz+sMmLEros2eeTrRVJfQnitz/6oX9frhZo4Q+t+EX9nh9ac97Qk/EE8lKMHLuC3hqhzsCEHci7ZxTIAUoOxV82WdE6ImyE5m25xeItEOg2ZmBgEPokdxwYBlnAcLNDgCfkdGIZylExiHUZCpilwc5VrDEmxWpa9euZtSoUUKeudeA6DlkG1vItMTOA+lXI7M18Qy/ww7OUPDDuCD21h570RTnLzisDT44M5B+ou/Rsj2RhQlHExx5f3iejFCR9dmxUELvvoajtRA2YVFCnz3zdKKtKqE/UeSU0Lshp/iFgV+qtuE9oU9V4HVc/0MAwsvlSJw1QAZD3v+6detK5DyWhp5IPLsiZCyicGEXxB2JVJDQWw09l4FxERiknnSgnMGwZfbs2aZz585CdpHq2IKsJd6sSBD6zG53hdBHZmvi7ADnO8hylJaWJvcWWEJv7QneHMv4iPAT8Wcngig9l5HhGFBstiecAXsrLBI0Ivs4DpH1cUDYGcBBsmcXguuSOr6WsAlz2OMM2z4l9GHPkFt7Sujd8Av7/XCz5vjaap8bor7j5zY6f2srofd3blLeMnTmo0ePlsi23YFBQkI6OM5HxCL0RJuRzUB+ixYtKlmEIOrdu3c3+fPnF5lL8FCsJfRcNoach+xEOBGQaeQ4aM4jCT3gx5sViXbjIfSR2Zros3nz5nJhGQ4Mchp2HWgvktCTYQmskM5Awrk3AQeASH1ktiecokhCTz+R9Wnz22+/jbrOVEPv9vqF/QdNCb3bfIRdWwm9G6Jhvx9u1iihz234hT1eX9pTQu/LTORCO0gTSlpQNOe2cNCzQoUKhkvCIPxkFYqW5YbsQNwKTFR/06ZNkjkIgtulSxch1/wbjNAjL0HGg07+8ssvl4Ol3CdApiQciCVLlgiRDha05/FkRUIGEw+hj8zWhN04DUeOHJFsSthEtB5MrD3BCD3PcqCb3Qy72xAt2xM7DtEi9NHqx1p2SujdXsiwCYsSerf5CLu2Eno3RMN+P9ysUUKf2/ALe7y+tKeE3peZUDuyjABZXEg3anPX0wCkmcPIROBjFVKIchg3VnQ6sl68WZEyGgCSm2jZmiDxOCbo3pHRkBVp7969MZtCG0/mJXu5GQ9Gy/YUq4Fo9aM9q4Q+y8vxmAphExYl9G7zEXZtJfRuiIb9frhZo4Q+t+EX9nh9aU8JvS8zoXakNAKW0PuarSkSfCX0bssxbMKihN5tPsKurYTeDdGw3w83a5TQ5zb8wh6vL+0pofdlJtSOlEYg2bI1KaF3W45hE5bGJWJnJDoRS6tXry53dfhafLfvlDqd9KZYh8UT9vvhYErUqmqfG6K+4+c2On9rK6H3d27UMkUgYQhwRoHL2Hwt3ADNWQtfi9rnNjOKn+LnhoBbbV1/7viRdU1LziKghD5n8dbeFIGkQICDyERJtSgCioAioAgoAllBgN0/ss1pyVkElNDnLN7amyKQFAj4LrlR+9yWkeKn+Lkh4FZb15/i54aA1o6GgBJ6XReKgCJwHAL6B9dtUSh+ip8bAm61df0pfm4IuNX2ff25jc7f2kro/Z0btUwRSBgCvn8hq31uS0PxU/zcEHCrretP8XNDQGtrhF7XgCKgCMSFgP7BjQummA8pfoqfGwJutXX9KX5uCLjV9n39uY3O39oaofd3btQyRSBhCHTo0EEuwvK1qH1uM6P4KX5uCLjV1vUH8MXHAAATS0lEQVSn+LkhoLU1Qq9rQBFQBBQBRUARUAQUAUVAEUgxBDRCn2ITqsNRBBQBRUARUAQUAUVAEchdCCihz13zraNVBEJD4JxzzjG//PLLMe2dddZZZt++febff/8NrZ/MGopmR2Z1cvL3pUuXNr/++qv5+++/c7LbLPVVrlw5s3HjxizVyamHwe/HH3/Mqe7i6oebn3fvDvf23rg6jvMhHzGLNN33NefzO5vo77wiRYqY33///Zjv+UR898f5OuSax5TQ55qp1oEqAuEgcMUVV5jbb7/d1KtXz1xzzTXS6HnnnWfGjRsnX/D58uUzH374oenfv384HcZopUGDBuahhx4Sopw/f37zwgsvSL++lCpVqpiRI0ea/fv3CyZTp041L7/8si/mpduBnpmfq666KkcdscyAaN26tczvnj17zGmnnWamT59uXnnllcyqZevv69evb/r27WsOHDhgChQoIGvcpzXnI2bRJsTXNef7O5vo77wLLrjA1K5dW97LFi1amC1btiTkuz9bX/IkblwJfRJPnpquCCQCgREjRpgyZcqYokWLphN6PoNUt2vXzlx44YVmypQppm7duhKtz64CwVu/fr3p3bu3GT58uCFq1bJly+zqLsvtzpw503zyySdm6NChhohpnTp1zKRJk7LcTnZWqFq1qpDSkiVLekfoFy9ebN544w3z1ltvmebNm5vu3bubmjVrZiccmba9aNEiM378eHFeu3TpYho1amRuvPHGTOvl1AM+YhY5dp/XnO/vbKK/81jzNWrUMOXLl5d3EkKfiO/+nHqfkq0fJfTJNmNqryLgAQKQGIigjdBPmzbNLFu2zDz//PPm5JNPNsuXLzf333+/SUtLyzZriUZC8nbu3CmE9JlnnjGzZs3Ktv6y2jBknkhusWLFzG+//Wb69etnli5dmtVmsu15HLD333/fPPjgg0JSfYvQI2thW/+ff/4xEydOlCh9s2bNsg2PeBr+8ssvxWn95ptvzC233CLvALj5UnzELIiN72vO93fWl+880lJaQp+I735f3jff7FBC79uMqD2KgAcIICe4++67j7ME4vfnn39KVDJI6JEdzJ071wwbNkzqWEK/evVq59G0adPGFCxY8Jh2Fi5caHr27GkqVapk1qxZY4j64VA8/PDDzv1ltYFY9k2YMEF01o8//rh55JFHZAejSZMmWW3e+flY9uFgbNiwwTz99NOGP9CJIvSx7EPTzxY/MqVChQqZjh07ylwnsqxYscJAqr777jtz+eWXm1dffVUilj4V3zALYsMOlQ9rLtZ88b3lwzsbyz5SCfvwnRck9Nn53e/Te5UMtiihT4ZZUhsVgRxGoESJEmbMmDHH9XrvvffKQdhIQg+xIRoNyT7llFOEXEN0wjgcS3S2cOHCx9hCf3369DEDBw4UbTUki0iz3THISbhi2derVy8zevRokY1Uq1ZNyF8iornR7MMxw8n466+/BKq8efPKf3Mu4uDBgzkJn0Tfo83v1q1bBT+cwh49esj6SnT5+OOPZc3NmTPH3HbbbaZbt24SqfelQPZ8w8xiw6HJ+fPne7HmYs0XO2g+vLMZORw+fOcFCX12fvf78l4lix1K6JNlptRORcAjBCIJfadOnUQOweeQHCLR1157bbZavGDBAtkVQObz2GOPmeuuu05Ili+FrWgkI0g0IPfovxs2bOiFeciiONxsy6hRo+SgG5KSMJywMAb5wQcfSOadROy6xLKfXRfmlDWOU4QT1L59+zCGG0obPmJmB5YMa87ndxYcffnOCxL6RHz3h/KypGAjSuhTcFJ1SIpAdiMQSejPPPNMw4EyZCUnnXSSyDjQZ2dnadWqlRArCOh///0nxJ5ovS8FScaQIUMkG8qhQ4dEojRv3jxfzDvGjkRKbjKKRkICbWGeE7HDEbTP7rTw2dGjR03Tpk3N9u3bvZlTJCO+YRYLHB/XnO/vrC/fecwda3/btm0mEd/93rxwnhmihN6zCVFzFIFkRqBixYpm8+bNOZZzHfJy6aWXyiFFXwspPX/44QdfzVO7sogAh3PJob5u3TpvdjOyOAR9PBMEfH5nff3Oy+nvfl3ExyOghF5XhSKgCCgCioAioAgoAoqAIpDECCihT+LJU9MVAUVAEVAEFAFFQBFQBBQBJfS6BhQBRUARUAQUAUVAEVAEFIEkRkAJfRJPnpquCCgCioAioAgoAoqAIqAIKKHXNaAIKAKKgCKgCCgCioAioAgkMQJK6JN48tR0RUARSG4EzjnnHLmIa8eOHd4NhHR0lP3793tnW2436IwzzjD8kBM/VQvvBilft2zZEvcQeZ6Uul988YVcgBdZzj33XPnIp1SjcQ9OH1QEMkFACb0uEUVAEVAEchgBLnUaOnSo5O2n/PPPP2b48OFm0qRJOWxJ7O64NZM7BbgQK7sKpI2r4zds2GDatGkTWjeffvqpyZMnT+g3B3fo0MFUrVrVdO7cOUNb6Z+7EbLrcrU333zTkCawefPmGRLejOzgxlEcgueee+6EcA8DY2yoU6eOpLlduHCh2MJ9A6Rm5Fbe7777Ti5mi7dw+3GLFi1M48aNzY8//nhctb59+5oGDRrI7b6p7AzFi5c+l1oIKKFPrfnU0SgCioDnCJDHfMmSJYZ/icxD5sl7TenYsaPh0hYfyuDBg4UU9+jRI9vMsYT+22+/NXfddVdo/Xz22WdCCq+55prQ2qSh9957z5QpU0YuuMroRl2cIS6egqxmR8GxwNFibvbu3Ruzi4zs4FbgP//8UyLaJ1JcMW7durXcAgxO/PA+sObeeecduTAOB4/bi5ctWxa3eYsWLZLbe2+//faodc4++2wzZ84caZO2tSgCqYSAEvpUmk0diyKgCHiPABHHBx54QIgYkUIK/3/33Xebjz76SG7ZfeaZZ8zNN98scpyDBw+aZ599VogIUfzLLrtMorJcqLVnzx4DiWnSpIm088ILLwjp5EZaiA2kFhIDeXvqqafMb7/9Jm1df/310vbu3buFPBEhnThxotm4caO54IILpK9atWoJoYcc3XbbbebBBx80xYoVEwnOa6+9JsQLWc6rr75qLrroIonm//rrr/Ic9s2aNUs+o0DckTkw9mBkNJLQE3G+5557pB/a4lp5sMEWorcffPCBYMCNnvwub968Yv/FF18s42Vsc+fONbHIZrz4YQfjQNZy4MABGSM4tm3bVjC1cxd87ueff5bbij/55BMDkeY58KYeOLdv3/4YaRVtEmVnvLfeeqtp2bKlmTJlipk8ebLceMxlaayFRx991NSoUUMi/mA6aNAg6efqq6+WaPSuXbvMqFGjBJPDhw/LJWY4iA0bNjSzZ8+OageRanaJaDMtLc3gIGS1BDGuUKGCeemll8xZZ50lbWI788O6YjcDZ4219PXXX8ulXDzL+sYGxnDkyBEzbdo0s3XrVtOsWTPBEPxq164d95qvUqWKeeONNwQL2q1cubKsXdbrggULZC5omxusS5QoIZhm5JRlFQ99XhFINAJK6BM9A9q/IqAI5CoEIN1IMSDvffr0OW7sRJUhnhBUiFH16tUlik9E9t133zVly5YVQnn66aeLZAcCBYG+8MILpQ5RYQg8BApngJIvXz4zc+ZMIU3INfbt22eWL18upGrTpk0GgmflPlb+A5GmDcj0559/Lu3gLNx5551C1K+77jrTu3dvkTDs3LlTiFP58uXlpmBIGqSWqKuNIBcpUkQIK2TUliChJ2KKI4LNED+i4NiJNAliu2bNGnEI0EdDxMBp8eLFggHjvfLKK8VenCRIW7QIfTz4MS5sp0AEb7rpJsEYYj1gwADBEhwh1kR66RNHCKcCe3nejh1yjX1FixYVkkpE2pbHH3/cNG3a1Lz++usSJccpgtBOnTrV9OzZUxwm5ppxffXVV9I+DhQ2NGrUKF1yg1Pw5JNPCiY4TaVLl5YucMJmzJghcxBpx8qVK03Xrl1lXb388stmwoQJ6XaNHz9e+oosOJnIo2wJEnrwLlmypMwb/zJe1hOOH9IZyvfffy+OBmvnlVdekXGCJbtUjBeHBoxYi+xgYSOORjxzxppnHGCFE4BTRF+sb4g8a53diBtuuMH06tVL1jBt04cWRSBVEFBCnyozqeNQBBSBpECACCJkFcIDuY8skB2ICeQPgjNy5EiJxkKe77//fiF5RF+J1EN0IY9dunSRqDTkjbYhuMgYiMRDbPkdkXEi+UgZiOYSpYR8QaDvu+8+sQfyDUEn0ksdyCqOBMT11FNPFVKE3h3yBMGGUHMQ0UY72S0oWLCgkH3IMPX5XaVKlcy4cePM+vXrZSciGqFfsWKF2EakGweBCCv169evLwQdUo1GetiwYbIrMWbMGNlVwH5LqCHP4Avxz4jQZ4QfWEOYcQwKFy4sRJsClozBSm6wC/yJmvMvBJrfjR49WggpOyC0xTxB2q1DYseOA4aDs3r1anGEsJeC5Apnhbni92BAXZyfUqVKyX/zmdXQQ1CZT3YAaAunCOfJEvpYdsSS3ECowT6y4FjhTEUSetYLfTJvRMRxOpgfdo9wFHAccARxgKwzyxpn/ijIbHBoWHPMP9H+fv36ifOALZbQZzZnvAc4wOCAwxWL0LP+sAnSP2TIkKT4zlAjFYF4EFBCHw9K+owioAgoAiEhYKORRE2tVIboJBFDSAbRRuQ0lqAhNalbt64QHyQDEHoij5A4PuMw4WOPPWY4pAihh0BbQg8xhCjy/3/88YcQKsgSUgjIGcSbqL4l9EHSGST0EHIcDOyC2EOusQHSZUk78CDVQWLC75B7WB05Ed+33347Q0JPJBUCDJkPZiiBxGMzY2E3AYLNDgBEGTILoefwpC1EmIliZ0ToM8LvjjvuEGkPhQguBJNIciSh57wDjsNbb70lTgYEu3jx4kLmwc6OHcx4hug1kp1g4TkK0h5ILxF7u6uCQ8HvibyvXbs2vRqEH5wsoSdyjkNgD8jaObCEPpYdsQg9pBpZTGSBnOM4RRJ6tPDYbtcza5AdHaRKfH7vvfeKI4RjCqlnBwNCP3bsWPk9xJs5x1lFPoSDwHzjJODEWkKf0ZzhbLCDYjX39qwDGLCWcBaxhwi9lbwpoQ/pC02b8QYBJfTeTIUaoggoArkBAXTG6LwhnMgQkBxAwin16tUTyQASCkgOz1n9MZ8TkY6X0EO0IUVE4Ym4zp8/3xw6dEgIE3IHiCPRymCEnggvjgTFEnoyhhAhRwdOVhIINm0S5YSA4oDYNIFEUS0pCx7IjIfQYyuEjHFDgHEyILNEwiGvSIUoVr9OCkJkROivibTi7PAZhA2yeKKEvnv37iJ5QjqEZASHCpkIUWQ02kThOY/ArgXkmcgyuxtgCXnG8QiOPSNCj47eatlxGKw8BecMO9ihwZlg1wGnip0bxoYjZgk9OzI4idixbt06cXQo8RB67EVatWrVqvRXjx0HdgIiiz2wGknoGS+7MYUKFZJ1hTOAnh1n78UXX5R1hwOIc4RtrEsIPTs7YMb84QTxDE4Bcw8OOG84uvEQeqRGOJI2q5Dd1eJ8AP2w/nBoIfRIvviX9RUcd2747tExpjYCSuhTe351dIqAIuAhApDD/v37S0SdApmB5EAYIaJILSDuthAd5SAhumoOrUaLVkL+0NXbCL09kEp7Vs4AQbYyDogUz/B7DjBCXiMJvSXFkCBIJP9vDz0SbYbcEhGHdFIgiERKIWRBUmvlJbEkNzZtJUSa6LPtB0Jotf3IOJBz0B/PUYiSE1Emwk2xOvVYKRXjxc/KVhgrOxiMj2gxRBWSSYGIW/mUnUMi0JDa4NiJnuOIRYvQsxuA82adFDtGsteAIU4cshMkNBRkLTgurBVL6H/66SfZkYDMgj+FdYUjBDm3EfpIO8CR3QccNZ61BQJs12Xw1eGMBJp7W4IY4zwQVbeyIZ4FJ6Lj7DzZOUKyhSQL3DgIHFzjtIssjAg+Ui4KxDueOcNJsNIn6uH4IAuza59dJQg97x0Owvnnny/SNC2KQCohoIQ+lWZTx6IIKAJJhQDRTMgakUJkMMGCZhrCDBG0Mox4B4ecAokBEX/INJIUm9EDknPJJZeIFj4rWT6oR5SYKHCQ2GETEVBIL9KQrLQZbTxovm0/OCLxFJ6H2IaZW5y52bZtmzhDwUIkGsfJyoKYJwgizlC89sYzpuAzrAMcsGgXIrHjgiSLA7DkXoeoQ2Dtrk9GfUHewTsM3GiHnSAOBtvLoMAQZ5F1gaPD7gISG3ZRkHfFKlaag5wnKPPJCm6sV/oi6m/XJOPFaSL6T9taFIFUQkAJfSrNpo5FEVAEFAFjRDMPoYfoaUltBHDY0IPbHRlGG5lRJ1EIQKqR09gdHOwIpmuNZReHmzkbwo4OGvywCrshOECcwcjKDbRh9a/tKALZiYAS+uxEV9tWBBQBRSABCHBAEtkDGm8tqY8AuwbIsNgt4NAsaRp9KUTukQOxi0K0HTlTPLs4OCocMrYpRMMYDxp++s5odyCMfrQNRSARCCihTwTq2qcioAgoAoqAIqAIKAKKgCIQEgJK6EMCUptRBBQBRUARUAQUAUVAEVAEEoGAEvpEoK59KgKKgCKgCCgCioAioAgoAiEh8P+oR1sO2PaK2AAAAABJRU5ErkJggg==", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/troubleshooting.html\n" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.match_weights_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "6a3dddb6-f9f2-48a4-9511-76fa0c0f79d3", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v4+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "config": { - "header": { - "title": null - }, - "title": { - "anchor": "middle", - "offset": 10 - }, - "view": { - "height": 300, - "width": 400 - } - }, - "data": { - "values": [ - { - "bayes_factor": 4948061.444887786, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 4,948,061.44 times more likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 2, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 22.238431985096113, - "m_probability": 1, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\"comp_num_clean_l\" = \"comp_num_clean_r\"", - "tf_adjustment_column": "comp_num_clean", - "tf_adjustment_weight": 1, - "u_probability": 2.020993496419037e-7, - "u_probability_description": "Amongst non-matching record comparisons, 0.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 1.3654800037318084, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.75` then comparison is 1.37 times more likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.75", - "log2_bayes_factor": 0.4494081872425724, - "m_probability": 0.025000000000000022, - "m_probability_description": "Amongst matching record comparisons, 2.50% of records are in the jaro_winkler_similarity >= 0.75 comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"comp_num_clean_l\", \"comp_num_clean_r\") >= 0.75", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.01830858008295684, - "u_probability_description": "Amongst non-matching record comparisons, 1.83% of records are in the jaro_winkler_similarity >= 0.75 comparison level" - }, - { - "bayes_factor": 0.02546625581726258, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 39.27 times less likely to be a match", - "comparison_name": "comp_num_clean", - "comparison_sort_order": 0, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -5.295269327176444, - "m_probability": 0.025000000000000022, - "m_probability_description": "Amongst matching record comparisons, 2.50% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 2, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9816912301278894, - "u_probability_description": "Amongst non-matching record comparisons, 98.17% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 9073085.184755592, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 9,073,085.18 times more likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 3, - "has_tf_adjustments": true, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 23.11316177321052, - "m_probability": 0.9126429163214581, - "m_probability_description": "Amongst matching record comparisons, 91.26% of records are in the exact match comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\"name_unusual_tokens_l\" = \"name_unusual_tokens_r\"", - "tf_adjustment_column": "name_unusual_tokens", - "tf_adjustment_weight": 1, - "u_probability": 1.005879364887769e-7, - "u_probability_description": "Amongst non-matching record comparisons, 0.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 2.525226260074343, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.8` then comparison is 2.53 times more likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 2, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.8", - "log2_bayes_factor": 1.3364126590180854, - "m_probability": 0.0018668876001104668, - "m_probability_description": "Amongst matching record comparisons, 0.19% of records are in the jaro_winkler_similarity >= 0.8 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"name_unusual_tokens_l\", \"name_unusual_tokens_r\") >= 0.8", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.0007392951790607094, - "u_probability_description": "Amongst non-matching record comparisons, 0.07% of records are in the jaro_winkler_similarity >= 0.8 comparison level" - }, - { - "bayes_factor": 0.12707702474714255, - "bayes_factor_description": "If comparison level is `jaro_winkler_similarity >= 0.6` then comparison is 7.87 times less likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Jaro_winkler_similarity >= 0.6", - "log2_bayes_factor": -2.976224877129608, - "m_probability": 0.011411212372272854, - "m_probability_description": "Amongst matching record comparisons, 1.14% of records are in the jaro_winkler_similarity >= 0.6 comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "jaro_winkler_similarity(\"name_unusual_tokens_l\", \"name_unusual_tokens_r\") >= 0.6", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.08979760420877689, - "u_probability_description": "Amongst non-matching record comparisons, 8.98% of records are in the jaro_winkler_similarity >= 0.6 comparison level" - }, - { - "bayes_factor": 0.08145354849808316, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 12.28 times less likely to be a match", - "comparison_name": "name_unusual_tokens", - "comparison_sort_order": 1, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -3.617878639101611, - "m_probability": 0.07407898370615852, - "m_probability_description": "Amongst matching record comparisons, 7.41% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 3, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 0.9094629401922473, - "u_probability_description": "Amongst non-matching record comparisons, 90.95% of records are in the all other comparisons comparison level" - }, - { - "bayes_factor": 1, - "bayes_factor_description": "If comparison level is `exact match` then comparison is 1.00 times more likely to be a match", - "comparison_name": "postcode_area", - "comparison_sort_order": 2, - "comparison_vector_value": 1, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "Exact match", - "log2_bayes_factor": 0, - "m_probability": 1, - "m_probability_description": "Amongst matching record comparisons, 100.00% of records are in the exact match comparison level", - "max_comparison_vector_value": 1, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "\n regexp_extract(\"postcode_area_l\", '2')\n = \n regexp_extract(\"postcode_area_r\", '2')\n ", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 1, - "u_probability_description": "Amongst non-matching record comparisons, 100.00% of records are in the exact match comparison level" - }, - { - "bayes_factor": 0.03125, - "bayes_factor_description": "If comparison level is `all other comparisons` then comparison is 32.00 times less likely to be a match", - "comparison_name": "postcode_area", - "comparison_sort_order": 2, - "comparison_vector_value": 0, - "has_tf_adjustments": false, - "is_null_level": false, - "label_for_charts": "All other comparisons", - "log2_bayes_factor": -5, - "m_probability": 0.050000000000000044, - "m_probability_description": "Amongst matching record comparisons, 5.00% of records are in the all other comparisons comparison level", - "max_comparison_vector_value": 1, - "probability_two_random_records_match": 1.8857934522042473e-7, - "sql_condition": "ELSE", - "tf_adjustment_column": null, - "tf_adjustment_weight": 1, - "u_probability": 1.6000000000000014, - "u_probability_description": "Amongst non-matching record comparisons, 160.00% of records are in the all other comparisons comparison level" - } - ] - }, - "hconcat": [ - { - "encoding": { - "color": { - "value": "green" - }, - "row": { - "field": "comparison_name", - "header": { - "labelAlign": "left", - "labelAnchor": "middle", - "labelAngle": 0 - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "m_probability_description", - "title": "m probability description", - "type": "nominal" - }, - { - "field": "comparison_name", - "title": "Comparison column name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4p", - "title": "m probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4p", - "title": "u probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - } - ], - "x": { - "axis": { - "title": "Proportion of record comparisons" - }, - "field": "m_probability", - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": "bar", - "resolve": { - "scale": { - "y": "independent" - } - }, - "title": { - "fontSize": 12, - "fontWeight": "bold", - "text": "Amongst matching record comparisons:" - }, - "transform": [ - { - "filter": "(datum.bayes_factor != 'no-op filter due to vega lite issue 4680')" - } - ], - "width": 150 - }, - { - "encoding": { - "color": { - "value": "red" - }, - "row": { - "field": "comparison_name", - "header": { - "labels": false - }, - "sort": { - "field": "comparison_sort_order" - }, - "type": "nominal" - }, - "tooltip": [ - { - "field": "u_probability_description", - "title": "u probability description", - "type": "nominal" - }, - { - "field": "comparison_name", - "title": "Comparison column name", - "type": "nominal" - }, - { - "field": "label_for_charts", - "title": "Label", - "type": "ordinal" - }, - { - "field": "sql_condition", - "title": "SQL condition", - "type": "nominal" - }, - { - "field": "m_probability", - "format": ".4p", - "title": "m probability", - "type": "quantitative" - }, - { - "field": "u_probability", - "format": ".4p", - "title": "u probability", - "type": "quantitative" - }, - { - "field": "bayes_factor", - "format": ",.4f", - "title": "Bayes factor = m/u", - "type": "quantitative" - }, - { - "field": "log2_bayes_factor", - "format": ",.4f", - "title": "Match weight = log2(m/u)", - "type": "quantitative" - } - ], - "x": { - "axis": { - "title": "Proportion of record comparisons" - }, - "field": "u_probability", - "type": "quantitative" - }, - "y": { - "axis": { - "title": null - }, - "field": "label_for_charts", - "sort": { - "field": "comparison_vector_value", - "order": "descending" - }, - "type": "nominal" - } - }, - "height": { - "step": 12 - }, - "mark": "bar", - "resolve": { - "scale": { - "y": "independent" - } - }, - "title": { - "fontSize": 12, - "fontWeight": "bold", - "text": "Amongst non-matching record comparisons:" - }, - "transform": [ - { - "filter": "(datum.bayes_factor != 'no-op filter2 due to vega lite issue 4680')" - } - ], - "width": 150 - } - ], - "title": { - "subtitle": "(m and u probabilities)", - "text": "Proportion of record comparisons in each comparison level by match status" - } - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvsAAAEACAYAAAAtPLkJAAAAAXNSR0IArs4c6QAAIABJREFUeF7snQeUFMX2xgvBAIgRAZWnmAVM/FHMIoIiiGJ6GMGMWVAEswIKBlQMKKKYEUVUzGAOiBmVp2JCCWIAxYRiQOV/fve9Wpuhe6Z7drZnduerc/bs7kx1ha+qb3331q1btVq3br3IKQkBISAEhIAQEAJCQAgIASFQ4xCoJbJf48ZUHRICQkAICAEhIASEgBAQAoaAyL4mghAQAkJACAgBISAEhIAQqKEIiOzX0IFVt4SAEBACQkAICAEhIASEgMi+5oAQEAJCQAgIASEgBISAEKihCIjs19CBLUa3llpqqcWq/fvvv4vRjMR1rrjiim7ZZZd1P/zwg/vjjz8SP1+IBxo1amTFzJ07txDFqYw8ECiFeZDZbN6phg0buoULF7rvv/8+j17pkcogUMz3krGvahnqZXZV11OZMdCzQkAIVB4Bkf3KY6gSnHMjRoxwrVu3XgwLyPOLL77oBg8e7P7888+Swen88893ixYtchdeeKG1afz48W611VZzV1xxhbv77rtTbSdk4sEHH3TLLLOM++KLL1zXrl1TrT+fyp599lm3wgoruLPPPts9+eST+RRRks8Ucx5EAbLLLru4yy67zP3yyy+ubdu2JYEb7/Nuu+3mnnvuOde3b9+SaFNVNeLNN9+0onfaaSe3YMGCqqpmiXKRTZ06dXKvvfaaO/HEE6ukXvp05ZVXup9++skxz4KJ95v3HCWgTZs2VVJ/PoU2a9bMHX744W769Onu9ttvz1nEG2+84WrVquW23HLLnHlzZchcN7LlT9rOXHXreyFQWQRE9iuLoJ43BG688Ub3f//3f2YZ//rrr139+vXdqquuat+xaPTr169kkPILuF8AIC8bbbSRkf2XX3451XaykB9xxBHu119/dTfffLO77bbbUq0/n8pqKtkv5jyIGoeWLVu6gQMHus8//9z17t07n+Eq+DMoH5BDFPnTTjut4OWXUoEo/yjiBx10UKq7fhdddJHbfffd3euvv+5OOOGEKoEE5RGZF0b22eV65plnSo7sd+7c2d6Hb7/91vDJlTJlfa782b5PUlbSdlamXXpWCMRBQGQ/DkrKkxMBT/anTJnijjrqKMuP1Qjr0bx588y6j5B+//333corr+yaNm3qdtxxR7fGGmu4q6++2q211lqOLWWsZ9ddd5279957XceOHc16/PHHH5slee2117bvL7/8cvf4449bHdttt53r37+/lYm1nkUAxeK9994zS2hmnew2rL766vbsd9995w4++GB36aWXuvXXX98NGjTILNXZysTKRf6ZM2e65ZZbzmHBoUysTGG7AhAFyBHP8fdvv/1myg9thuQfffTRbumll7adDxaTk046aTGsX3jhBTd//nz37rvvWn9OOeUUN2PGDHfVVVdZm0lz5sxxJ598sps1a5b9Dzk48MADrX3ffPONowzakAuvM844w7FIkR9FCBJAOfvtt5+1q169erb7wG4EZYdZ9ukj48PzjCckdfTo0W7cuHFWf8+ePd2hhx7q6tat6/766y/3ySefWNvBcNSoUW7NNdc0ayZjUKdOHdt1wYWlS5curnbt2vY/Y0r9TzzxhPvxxx8ND+rDCvnoo4+6Sy65xOpiHvAceX/++Webe3369DHSFobr8ccfv9g8YB7vvffernHjxjYGr7zyijv33HOt7Gzzlu+ffvpp6x91brPNNja+ftz5PlvZwQlAvyBkX375pRFOFMJ1113X2rLtttua+9mHH35o84I+ZiYUcIgjrkC0gfagYIIBZOn000+3d+v333+3+XPeeee5zz77zOZq1Dh6sg/uq6yyilt++eVt/MD7nXfeWaIN//rXv0wW8Js28D7zP+9otveD8UwyJ+LIi6RzgnedebjrrrsaZvnOiaRyI5PsR40jfWYM2fnh3SUx/5kbTz31lMldykIOgzXv9JAhQ+zdiUv233rrLbf55pvb2PH+USa4MK95V70h5+GHH3YoCciFSZMmVcwDFEPmFfOFucY7jksa7TrrrLNsV5X5gxxgbqy33no295DTyHSMR+wc8w4MGzbM5BCfIzuQ3/TjnHPOcSuttJK9A//5z3/sPeVvT9AnTpxomPDcSy+9FLkjFTW+jzzyyBLrBn2N207WB3Dh3ULukygTPJCHyMm4MiHnYqwMQiAEAZF9TYuCIJBJ9hGEWKlZ4CGI/I1A9gkihPDFFYBdABZ2iDCC3BO1Jk2aVFgOyQ95ZEsWgQ0JhQTef//99hkLMd+zMFMWbgbt2rVbok6E7QYbbGB1QG4QsJBR78bDIpWtTBZtFjsS7aBd1Elb6E9mCro3QUzBhYRSAYGHYNJn2v/888/bghBMfrHyn5F/wIABRripkx+e5zfKE7h4CzAEtUGDBvYo7WCBz9Y3SDQk2yfwYeva7zYEx4A8YWTfzwPGAAIJqScdcMABbuutt64YT0gHbWPsOKcAUfE7BuSHWICrT8H/URg++ugjI+yZ48D/EAK+v/baa22MvvrqK8dcYn488MADNn5huNJ/Pw/4HuXNk1N2flA2IBGMXbZ5Sz+C5TOvUThIkBPaFFX2hAkTFhv/TDcextDvmAXLveOOO9w111yz2LMowJRHu5kLjAWY8g7QDnaxwAQFmXeQ7yEd++yzT8VOXdg4HnvssRVuH7QBhYNxDLMQUz7z2s9x2sIPz+2www6Luf9lvh/MryRzYuONN84qL3hnks6J4cOHG6aQZUhqvnMC3JPIjSDZR25GjSPGAizw4H/MMce4t99+2xRBDAgoAchBfphz4AshJh133HE25rks+/794l32cxhyD5aUz/sB6ef9uOuuu0z2IkOCbpv777+/O/PMM61eZAjj7xP5/bkB5M2+++7rUBpQpjHsME9QKMl32GGHmWHI/49iCaZjx461/qPwMM/4G0USd5/ge0h5fn25+OKLTRYGE+tC1PhSd+a6AYGP207eAXbBMHyhoJFeffVVex+TyoTFGq1/hEBMBET2YwKlbNkR8CTPLw4IXJ9YtFlYPNnHWnf99de7Dh06mJWWhQhijiUG0tKiRQuznCOMEZAsHAhIysQaAilhQWDh8jsHfI/gRKiyKLEAYCXKrBNSnbkdG/TV3mqrrbKWyWLKAsPiwyKG1QlLEQsWFnW23X2iPQh0Eu2AqEFU+WHRg/zSf4hu1Ha9byu/Id70GeWEhDWXBeyGG26wvmMt7d69uxFWdj7I36tXL7fHHnuYpQ1FIxtekBkWajDC+oc1HuLMYs4OAecJUHbYVSCFkX3vI0u91Mm8YPeDctgdYKH2vt7s5kC+SUceeaSVi6XrlltusT7hJsJYPvTQQ3a+gudQECBs9913XwXZxyKIhZF5teGGG5oFEOK711572aKPRRFiQt1YKcE/E1cUjuA8AMMePXpUWDP5nnmH1RALabZ5y06IL5/dInD0BIbfWDGjymZHIJiiyD5EnfFnjOknfeZ3MKH0YTWEBDEeEBPa7fGGKEL0sQDzN9Zb5hNzJNs4QtJpF/3gHd5+++2NhIX5d/v2M9/bt29veDLmzFcwYCcm2/vhyX6cOYHCm01egFnSORH02QejfOcEynZcuQEeQbKPHIsaR3ZneLfY5UMJBVveHa9MeeKPwo6cuuCCC8wAM3nyZJMjcci+l2sjR450W2yxhckq5h7vN8SducU4Ix+CO7t+LnqyT5t23nlne9d5B5kLKCLIJ3bc/NxjZwJSzlqAPAJzEnUw94JuPJB2dl1Q6Pfcc8+KXVlINTthfvx4hnfPW+i90h98X+hTtvcyc91I0k4IfTayzzjGlQniIkIgHwRE9vNBTc8sgYAn+yzqkHbvMoBFikUIayGEN3jQEAKGJTpo7fBkmHwskAjIIJFh0d9ss81MiGPhXGeddRY7E4C1BT9nvodcZ9ZJw7ORfSxL2cr0iwRWMsgLiS1iiCBthaD6hNLCgkWCJEOGIFwsOiRIFRavOGSfRRJcWVDZEg9LHPSlLMioVy6C+VCAsvUNks/iCj5Y/kj0F1LulQc+gzRRRybZh0R4d52wA3HekhV8DgICoYd0YJmE7Pu2eyu2J8y0AYsiiuKYMWOM7AcJJiQCYsLcYaEHe2/J9DigrFGPnwMeV74Pkn3mLcTAWzP5HjKCqxPkNtu8xa3Al+/H/dZbb3Wbbrqp1YFSFlU25CyYosg+yg9KkO8zrlyQpmBC4Qnu1AS/AyfIDdbwYKKPhxxySNZx9G48tBVFy89plHaU5WBiJ4rds+A7nuT9QBmIOycgk9nkBYQv6ZwIkn12MPKdE8xhyH4cuQE+QbKPLI0aR95Tdq1wC0T5or0oYMhcdqGCxofguCBTeedykf3g+4V1m/H2Ozg33XSTa9Wqlck/5hEKcqYMpE5P9v0cwDADHh4LP8c5t8TuJDIROZwZ3Q15y7sXJPso+NQddVjcj583Yvj1gXFkhzSY2AnL9l5mrhtJ2pmL7GNMiSsTQoW/PhQCORAQ2dcUKQgCYT77wYI92Q8u+t26dTN/T2/lJr8nKFhq2FJl8WDBwaIDGWURw0ILCYD4Qab89i/Ps6WNBRtiCLGDOGYSjWxkn8UmW5mQUxZt/P2xSpGiyD67Gd7VBKsg284eB0+M4lr2PXnGdxeseR7fawgOZIu6sMZiHYec436AfzcYQwJpNwtztr5BjFkUgwsnFsFNNtnEffDBB7ZrAMnHoscuSybZZ3GGAGLtw1eeXQ8sz/jg0mYIDJZ5dmywyOHrzU4MCesgBDYusfNkn2chrSggEBeINnMH4tO8eXNzS6GdWLmJFpVJ9oNKSZDsM6ewlmJhZPse3CD+kCQIRrZ5i4Uxc44FyT6KRFTZmdb5KLIPueMdyEb26TekCXLmXbuol7HDHxo3B94p+sI5AL9ThAKUbRxROIMHdLORfeYcpNBbmpkj/I9ln3ofe+wxG/+o98Nb9uMogJ7sR8kL5mHSOREk+/Ql3znBblRcuZFJ9mfPnh05juBHv70F37t2Me7MYcg+mGM4YfcLQsv5Db5jbuci+7QFGYLrF/7pzA12BXDlQlYgr5lD/jwScjozJSH7KJp+t4+20WbOb5HCyL6f495tBwMLCjnKL/Mr8z3MRvZRJLK9l8GygruScdrJe40sxJ2OXWwf7Yh+oQigKMWVCQVZsFVI2SEgsl92Q141Hc6H7ENQIecs/JAzBDS+nyTIKpYeH+0DAgeJ9HGvsc5CvlhESSxe+Kn6w7dYdiGp2cg+7gsQUFxCvK82Lh/ZysQynmTRxvqKewwLIgcp2XWgH35xSkr2wYrdAxZXiCznIXCz8YsGizG7I7g0sDhhaeUZtpzJm61vWO0yyb5XyCgfbMDf+72GufF4NyzGE4KAckJCMUI5gSiiqHBQFIJJWYwzJB0XljhkH7cFdim8IoU1GhLuDyyjCIAz40+IPnCAcEByvZtBJgmgjUGyTz9Rbiibz7HK4yIEBlg4s81blKxsZB/XmaiyaWcwVYbs45ZDW0iQPpRkMIIQosRhFaZ/99xzj+1UMBbelSLbOGLRjEv2qdvvBHGgEus4RMlbh3O9H3HJPnOCdyybvEChTDongmQfmZPvnOBdTSI3gpZ9iHrUOLI7iHIzdOhQs4qT/BkY/mZsGXPGFeIMGUdOUt7UqVNjkX1kCfPeK4pYxLFCk7yhg795JyDalSH7EF92B5ER7OIiN9gxJOGugzzzoZw5I8Bupt9NpH+QfQwKPmRpErLP/Mj2XvqyWDdQ3nGDittODBx+RxdZhXzxZ5LoM2FW48qEqlnBVWpNR0Bkv6aPcEr98wdRsV5jpctM3v0k08oOOcXKywLkkw/p57c+WcT9wT7yQM59xBWskQhJf0YA4Yv/MNbXqDr9AkhZ/jAiZB/LFd9lK5PFJ8mijdUTYgXR8glhD2EkIkVSsk8ZHATGlcIffqXPLD64t2AtZ0fEKz3kx33GR/nJ1je/q5K5JR50B8GlgMWfusPIPi5W+AyjjJBoG9GVwIDFje/w+/UJ4omSgcU9LrHL9NnHbQfFkeT9m1EyIIB+QUVZBBNPZrORfeYB1kXmGePnk4/Gwi5HtnlL/szyvfsZigM7GNnKDr47nux7i6B3bcJKyO5VNss+5aD48T56lwhvyYf8U5Y/wA0ZhVQxXkQgwkUuahwzQ2/iRsIuQ1RMdt7jU089teIdZf6g+HF+Itf7kWROeJ/9KHnBLk7SOeHPLkCqmfv5zomkciMzzn7UOLJjR2LH4s4777S/kcXsnpCQO1izg/IAyzzuVRD/XHH2mQ8o417B9++Xv4TL37lAXRzC//TTT5eQ/ZxhIeqOl/0eC+/Gw7vEO+ffTf+uUBDzHqMO6wNthdxjFEC+eLdDv6PnK8YlhjmHfM18D/1OJcTbn1/xz1FmtvHNXDdQepB3cdvp3Z7IT19RfFnXMExxRi2uTFgCYH0gBGIgILIfAyRlqVoEICIsVhAMCIAPIejJPkSEBQNLJYsN5C6YENKQOxZjlI04F3hhWWEBgQSG3R6ZT5nZUIJI4Q6CNQ33lsomjxmWcBbvzJt/sZ5iiYNEZ968mk/fwIvQp9SV67ZN2oaFDXcqyFJm23AXgvCj9DCe+STvIuUJJjtC9DN4AzHtYF6wi4JSkU+CjLKg01bKCfY9at7GrSdb2XHLiJOP8caFCeLE/Av2gXFipyHs5uZc4xinbp8HwsaYQxyJQpU5hwrxfsSRF6U+J7Jhmm0cc40F+LKTxs4Wu25JEy5QzJ/MeeL9+MPOZCStI5ifdwPDRZh8YAxRXlDufHtwT2J9YDfRhyDOt/5s72XmupG0nchrDEvsOIbJ0bRkQr7Y6Lnqi4DIfvUduxrf8uDijQVeSQh4BDLJvpARApIX6c4BbyWnVnYVscgrCQEhUJoIiOyX5rioVf8LtYZ/MOEOidyjJAQ8Alg58RtmO1yKoOYFCODyJHmR3lzA75/LETmTkRnZJr1WqCYhIATiICCyHwcl5RECQkAICAEhIASEgBAQAtUQAZH9ajhoarIQEAJCQAgIASEgBISAEIiDgMh+HJSURwgIgbJAgEOBHKLjQG6xEi5KHDLPdRC6KtvHIUgiGWUers5VJwdx8z0MnavsuN9zEJ4wn0pCQAgIASHwXwRE9jUThIAQEAL/Q4CY3Vz0RfjWYiUuSiJiDf7nxUqckeEiIEJOxk3+xujgrcT+WcLjEmKVy8kIxUp0GD4jNjthIIkS1KdPH7ukiXCP/nOiOSVNjCGX73FXh5IQEAJCQAiI7GsOCAEhIAQMAS604lIm4o8X06peE8k+IQW5EIlLtIjcAtnnHgwwJ94/Oyp8xw2qWOX958RiT5oIt8odAbvvvrvddK0kBISAECh3BGTZL/cZoP4LASFgCEyYMMHikHMLaJcuXexyNeJhEx+eGP6EFuRyMlxVuKznhBNOWAK5jh07ut69e9udEcQzHzlypLv33nutPCzY3BnRqlUrixHO5VRc7sOlVNxATQxv4ob7G0AzLfuQYfL721T5m4t4uO02mLh8CPLMvQjEH+cuCW5/hfhyYRo3gEKEaSd3TXBbK/lxv7nqqqsct31i2d93330tBj8xzYmPf95551XcWszlVJBqLrMjtjnx1rlhmLZwtwMxz//66y+7ZZRL0Dp37mx4Uq8n+9zay8Vg4IK1n377i8sg697iD3Z9+/a1i9y4o4ILucARBYLLmLhPArcn6uY5EpdxcTlf//79NbuFgBAQAmWPgMh+2U8BASAEhAAIvPbaa3bDKjeR4mLSq1cvx22ckGOIKX7skGVSu3btXI8ePcz9JJheeeUV98UXX5hiwM2YXEbETgEWa8qDPN91111GvnFXadu2rRs7dqyFMITM4+bChWjcEppJ9rktFas/N06T+J+bfiH3wUR566yzjpXx/vvvGzmeOHGiO+eccyxMIvVyORq3UI8ZM8YuS4Ikd+rUyS4Q4yZUiD59RlGAkNMXFAPau91225miQpx1+oNiANGGYPMbxYBbnHfddVez4B911FFWrnfj8WT/0EMPNXcdlCYu1eN7ykSB4UZePqd8lCWs/bjncBswN4/SjqFDh1q5Q4YMMZcjsONzLt1DyeD2WJQLJSEgBIRAuSMgsl/uM0D9FwJCwDVs2NAs+1iT8RP3ZL9Dhw5moYdUkwcfdkj/66+/boQXIhpMWKBvv/12cwOC0HPzMz7s3AUA2d9tt93Mwn7kkUdaXVtvvbV79dVX3a233mo+5hzOnTRpkpHgypB9iG779u2tabQJyzq7DpD9Bx980Kz5tAFCDSGGVJPoO8oC7Yfs8xy7DcSwp78QcvrfuHFj22XATx+rPEoP5xwg+/jd85t8KFAQd6z/2cg+CpJ348F675WAbt26uR133NFtu+221qZtttnGFDIUksGDB7s111zTlBiUGRQUFDNcf1BkwJ38SkJACAiBckdAZL/cZ4D6LwSEQAWZ9WQ8eKAUeO655x4jm9zS6kkxlmUsyMEEAUVBgOjiWkJEG0/2PdklP+QfkspnuLlAXj/99FMrCos8bkK5yD7EHSt4mGX/999/d1jOSVi9zz//fCPNkGKUDhSKCy+80BQCLPU+4fqCIgPhDh7QRQmhPp6ZNm2a7S7gRsMuAemtt96qIPtY9HF7ImHtf++990yJyIfsc4YCN53MxGFedl3YkahXr55h/cILL5gLFgkl5ogjjrBzAkpCQAgIgXJHQGS/3GeA+i8EhIC52uCvjjUen3PIPv75WN492ccHHXecKLKPDzu+9+wCYN3GrQRruCf7wfI82ceqzo4CxJ+DqiSs4bgLhZF98mKV97sLKBthZL927dpm6Sbht46Fnj5C2PGd5zdWc6z+WM0hy94Sj8KC+xGHZL1lfMsttzRijSKDGw5uR/SH261x88GH3lv2g+5NKA4oRSg9+ZD9Y4891s4C7LnnntYXlA6UlvHjxxuRp36s+9TJ7oNXZFBuaGuSaEJ6DYSAEBACNRUBkf2aOrLqlxAQArERqF+/vlmGIazeZz8p2ecgKcQa6z9uKbiycEgW0glZDSP7kGjCROLnjlsPSgAW6TCffUg1hPzwww93kGDKjCL7zZo1M195Dt1ySBhXJMh7kOxz0BhLPwoEFnt2AmjD9ttvbwoBbjy4F+HXj6vOaqutZoSa9uKKxPe4BqF8cLAZpYM+c04AyzrWd3YHMnc2wnz2Z82aZe4+WPIpK9OXH6WJdvIbpQrF5ZFHHjElA1w5EI0b0Nlnn23t4xDziiuuaH1WEgJCQAiUOwIi++U+A9R/ISAEDAFcTiDD5557bl6WfYg4hBR/edLHH39sLihz5swx3/4osg8hxf2EyDa4CkHQcbM566yzFhsZIt5gHSctWLDADsziXx9m2cfyzvckysM/f8aMGdY/yuAgMQl3HNxucMehfvzwBw4caIduaRd94ofv8YMnUg/toC8kdjsol37iS8/nHPglsg7JKy254uzj5oMLES45KDIjRoyoOD8xatQot/HGG1t54MMODEoOShXKgXclgvj73QwUI9yH+vXrp9ktBISAECh7BET2y34KCAAhIARAAEs0sd8r6/rRsmVLu4EXv3VCWvKDD3y2tNJKK9lhV3zfs91Ayw4Eh2M58BqVUAB+/vlnc2khsg+kN1si9OdGG21kFnV2AIIJok/oUQ7gBttFO7ipFjcaCDg7CbNnzzZ3IBIYEMknaZx7zgGE3V5M+eySEEWIvvlEuNJNNtnEXK/8OQHKwMrPTsVHH32kyS0EhIAQKHsERPbLfgoIACEgBEAAYsvtufjbc/C1uiZP9nEHKsfEDgO+/T179izH7qvPQkAICIElEBDZ16QQAkJACPwPAXzosSDjrlJdEy47WOEzIwVV1/4kbTcuSP6cQtJnlV8ICAEhUBMRENmviaOqPgkBISAEhIAQEAJCQAgIAeecyL6mgRAQAkJACAgBISAEhIAQqKEIiOzX0IFVt4SAEBACQkAICAEhIASEgMi+5oAQEAJCQAgIASEgBISAEKihCIjs19CBVbeEgBAQAkJACAgBISAEhIDIvuaAEBACQkAICAEhIASEgBCooQiI7NfQgVW3hIAQEAJCQAgIASEgBISAyL7mgBAQAkJACAgBISAEhIAQqKEIiOzX0IFVt4SAEBACQkAICAEhIASEgMi+5kDZIrDMMsu4evXquZ9//tn9+eefZYtDqXd8xRVXdH/99ZeNk5IQEAJCoBQQqF+/vlt66aXdDz/8UArNURtCENAa/w8oIvsl/Iq88sorJky+/PJLt9dee5VwS/9p2nLLLecuvPBCN2PGDHfddddVSZs333xz1717d/fss8+6xx9/PGsdzzzzjIMsbrfddu6PP/5YLO/111/v2rRp4/h9yy23VElbi1no66+/7hYtWuS23nrrYjajUnUjrF9++WX366+/uh133LFSZelhIVBsBDbYYAN39913WzMefvhhN3DgwGI3KVb9SWRurAKLmGm//fZz2267rRs+fLj79NNPI1vCWvbSSy+5H3/80bVv336JfKw/K6ywguvcubObO3duEXtU+KrPO+8817VrV/fAAw+4wYMHF76ClEqs6Wt8EhhF9pOglWLeXXbZxV122WUVNXbo0KFaWBBWWWUV9+STT7rvvvvO7bbbblWC2OGHH+5OOukk9+KLL7rTTjstax0vvPCCwwKzww47uN9++22xvEceeaQD1xtuuMHKqmkJsk9CoamuaamllnKjRo1yX3/9dc6xrq59VLvLB4GLL77Y7brrrtbhn376ySHnq0NKInNLvT8YdjbbbDPXv39/9+ijj0Y2l11f1oX58+e7du3aLZHvkksucWuttZbr2bNnjdt1PP/8883A+NBDD5nxrrqmmr7GJxkXkf0kaKWY9+abb3ZYUxYuXGjW/TvuuMNdc8011gJePsjr22+/7bbaaiuz3mKl6NKli1tvvfXMEnHssceadX355Ze37zbccEMHcfr+++9d37593ZQpU9y5555rFou3bKqeAAAgAElEQVTJkycbIeR7rBW86KSTTz7ZHXTQQfb5U0895dq2bevefPNNI10tW7Z0Z555ppX7+++/V3w+fvx4t9pqq1mb3n33XcfLFkz33XefWUM++ugjt+WWW5ogRWieccYZbqWVVnKffPKJPYNbDQsj1tzatWub5QQh/dlnn7lhw4a5unXrGjb333+/u/zyy81CBnGvVauW5eEz8PFk/9VXXzWscAcBjzvvvNOdffbZbvfdd3dXXHGFWY7POuss98EHHxiG7AbQxhNOOMH98ssvrkmTJu7GG2+0399++6376quvHFa6Qw891M2aNauii3vuuafhy67MFltsYWX069fPXXXVVfY/CWvSMccc4xYsWGD1sAOy/vrrW3/AjHYwhhtttJG7+uqr3aqrrmp40q/jjjvOlD6PI+PBQgSBYL4wN1BuqLd58+b2XBjZj8Ir23xJMu/GjRtnY/Thhx/azgK7KpdeeqntxKy88sq2m7LOOutY+z7//HObix9//PES/erUqZNZQMF83333jZx34JoNr9GjR7vGjRvbnNh+++1tTHnHsLLSZ6xXzEfmD+/NiSeeaAorc+H000+3eXnllVemKAFUVU1EAPIIiUS+1alTxx188ME270lxZSPvUr5zHVnOu4csog3smm2zzTYm21Cqmeu8cw0aNDAFG4MTMjpM5vrxYfftiSeeMJlInyDA/D1gwAD3zjvvVGoNCs4ByOepp55qMhp5SRtZl+gHcvzvv/92F110kZswYUKkjGENO+qoo6ydyF/WOmRCmAxGHjNerA3ffPONa9q0qZF61j/6hUzhM9qF8SnftTTYR2QsbaRfyHRwZ+269tpr3eqrr25y9LnnnnNY3kmsw0OGDHENGza0dYH2ggHpggsucB07dnSMD31g3aPNrDmPPPKIyTTWCtZddieo6//+7/9sHaK/jGMY2a/smpVr7ac9vAusd7wrtGPevHnulFNOsbWTuctawjqCAY/50KtXL7fssssu0S/w8Gs8fWGsjj76aFvH/TrK3M+GV675HcWFKBM+ALaPPfZYVsUyDVknsp8GygnrQCBDFiG5kMTevXsb2WXSkG677Ta3ySab2N+QV/KRIE4kCMv777/vDjvsMCMzkFIEIUKRictvLEqQFwgOCSHCd6R99tnHFIx77713ie942Q444ACbuLwwLFTNmjWzZ7GUQNR5OakPyzJCMJjYFmV7NNhWvic//Sbx8uG6RHn0D6IHUSMPFiaEEgoDbb7rrrtMcOPWQ5kIKi/cUBQ82adcBIOvm+/ov3fjQRj26dPH6vcLsW8L7aEeFljaQzvBmHTIIYcYsfbJW8D8/5Bx2sS2MeXSB9o3ffp09+9//7tifMjDOFL2f/7zH7MWIajISz/pI99BjBmfiRMnGpn2aeedd7YdFfIHseTvTLKPYIzCK9t8gSTEnXeQCD+ffHsYH4Qfys9OO+1kCxD9Ziyj+sWCx8Ln3Xii5h0LfDa8gvPAK9DMB5RmlD4UiTlz5ti8Z3fK7xqhQKKMsvhBgpSEQL4ItGrVyt10001m0X/vvffMtZB3lvlHiisbMXrkO9dRYo844giTlbyXfu2ABD7//PMmeyHByFzkOAov60+mzA26aHoLuF+DKBtZVdk1CJkQTJD0448/3j4KrnvB//07jWwPkzG4pWDA8b72kH2MWqyRmTLY7x779Yl+gRfEExIddOPBeJLvWhrso9918J/RD8gpMpJxYf0C23vuuce4gZ8HyEe/HiCnMWD06NHDisGwAakn0fdp06bZvPOJ71nfWHMy178wsu/XiKRrVtz5jcEQY5HH3fMCvxNG25HRzE0UPsbywQcfNCU2s1/MQb/GjxkzxhQlEhigMLKOM5YY7aLw4l31O//Mgcz5HbUm8TlrB2sIaxhGvGImkf1ioh9RN75yaO4QfDRRiD+TEss91hZP9pm8aPxMRF4IhCFWYKwxCCQIDCSHhEsNmiykiBfF+7IhoCCkWIxvvfVWt+mmm5rVG6IGqWcBwNpDPRBWT/bfeOMNaxMvDy8UxB8LCZ9nc+PxLzykmLZiVUdoQOrOOecc+02ZkFk0cl4YXmYsv9SHkkK/gm48Tz/9tFknUA6wHGMVwPpAPqzCCDos/QhIX3+3bt2sX5lkH4ECIaXvXjAeeOCBZl0BY9rEbwQS7Yki+xB7rA8Irddee80WCSwTlA/OXshgEUN4MD4IaMaWcWfHgp0T8iMs+I7zB34ejB071oQ7igH56C9CmPwohVgMaX8Y2Y/CC4GHxSdqvrB4QvZzzTuEpyf7EHuwYxwYI/rOgoXFfMSIEW6PPfawcWIho3yvxPh+MWeDPvtR8w6hnw0v2sw8YPFkHvBegB87V8yNNddc0xYAFg0WfpRNLP8QHt4NvquJ5zpKUPzV2CaxM+sJPu89ss/P+yDZzyUbkSf5znV2sNhRQ54iB1gPeC8h+7wfrD3IEN4RSDzvKu8M8iTKddKTfeQYchPrN3IYuY7syncN4nnWFRJWXmQoZN8bSrCWYgRC3tPGSZMmmXxkTcNwEyVjgm48rFXIlzAZzHrqSR79oJ/IMYwvjGMY2c9nLQ0j+xBZdohpF+2lTuYFu6QYaxgjyD7KineZZZ1ixxi8Mf6gIPh1b+jQobZLzs4x2HhSjFxk9/j22283uQeOzAUvhzPJvj9Dlc+axTgxn3LNb94T1k3WLtYSZDvrIWO7//77284KcwxjJOsL3gVvvfWWvROZ/QIjv8azAwCPYW2mX3AkuBB/o7xG4YWCzDwIm99woqg1iXccvsLc4V1jvhQziewXE/2Iur0VGVcOrMZsrWHZ5WVBU/Rkn7/5zBMrhFyLFi3M5QcBwOTHUoPmztYgicmPNdNrxzzDZ2zhQk7ZioRItm7d2q277rq2FQsBROlAsHiy7xcu3wUWLciztxpE+ex7ss3WMduSvNQ+L88jsHixePloOyQ2mMLIPmQSfOgjfQ0mb9H1Zx58vyHpCM1Mso/QwMIB+WZhnDlzpileYArxpBySJ8xRZB9rAELNC8ewoWabFXyD4+PzeWuzXzyCdYIT272QVa8AIsAhrswFlAq/AIeR/Si8mGfZ5ssaa6xhZD/XvAO74Jyk7RAbxgf86RN98JZ/vs8k+75fmQd0o+Yd44LSEIUXOENmmD8slJActr7ZieKdAT+/W8NiAA4i9yUoHKtxk/w7gZsYO0W4FZIglbhVxpWN3qUtn7nOvEZWIveQDX49gOCh6LL2YFwhQW4g0Fj2s/nse1kTVFxoG9Z3yHm+axCGAMom0WfWJcpD2YGAsWZtvPHGti6hXHi5xpqGLIiSMUGyjysLfQ6TwVH9Qj6wfoWR/XzW0jCyjyGEXSDILet4ZqINkF2UsOA88PnYVcco5ddEv35jSMKogxz2uyA847HzgSy8nM0k++xuR+GVa83CvQiyn2vtRzGBFzDmGNdIft1mTOkT64OX13wfJPvBfmUe0PXrtscJow4WfcqPwosxgJOEzW+Ur6g1yZ+ZKxWRJbJfKiPxv3ZAbryFIrNpnmzGJfu81AhBH5EF4c42JtZKLygRjF77D5J9SDakDY0a33mIKRZvyD6LE37iWENRCHh5EYwQYywLcSz72V54CDoWHQQLLzHk0i9CYWTfEzesSrQPIsy5ARYptOvgAd1cZB8tnQUlSPYh0YwJixfWZwStt/RHkX2/IDGGXvPHmob/K76GLKgsFggfysNCgGWBXRkE0OzZs80yj+8ruJP8DgECHqszZB8ceR7XHnZGsICxa4FrFYpgGNmPwmvQoEEmZKPmC76SSck+ChXYsTAxV5h3tI95jpWf8yK4CGSSfd+vINnHjSZq3mHly4YXC2dwHgTJPnONXaa1117bFGH+B1MWPvBTEgKVRQCCgsIblrCi4oseh+wjG9lJzXeuI0sbNWpkriH4nWOpRomH7ENo/vWvf9kP8si7OSDvcUHKZdkPHmT1ZB/Zle8ahBsPbSVxLgq5GZfse8UqTMZkWvaRT2EymHUx84Au/cpG9pOupaxZYWTfG1SQRygQEFjOtUFwWdtQTpBN5ENhYY1izHDdZR5xZo/1AcMPWOBCQhQijHHMHdbB4AFxH7UO904s4N7gGGbZj8Ir15oF7rnIPvOb/nrLPv1nV8MrIyg+9Jm1GAMOngi4wwbJfrBfQbKPlwL5kfMYeJjT7BahRKI8ROGFQShsHtAGjKNRaxJ4l1IS2S+l0XDOtG5IKoITckiCGCJESGiZuKvEJV3e+oA/Mlo9k92Xw4sTRfYhySNHjjTiB+FkQSBBphEofnsT5QHFAPcbBDIvhiewbA/6MHMe5jgLGi8lB59YbHCX4SVkW5LEy8VWM0KMHQGwwKqL5QJLAGcIIGneUpMZjScfss9L65UkBAmWMW9xikP2vU8fRByFCOGAAEOQYU0DW9xW2E5HEWNBwVoFgfXnL1joyBdFimmTd/eCDLBVTxvDyL6PtBCGV7b5kmTe+cUWvFi0OcREwoqCosKcZssYQQ25iEP2mWNR8w5Xm2x4Zc6DINlHyWGXibLBDkLhrUOQNN49FjIUKiUhkA8CWLfZOWPHD8WThGUfee/dQuLKRgwC+c51rOXIM1xk2Dlmd4sE2cfYAOlBjiOzsOYjQ1hzMHIEZS7vt09hUWs82cfyme8ahPEpmLzPfhzLvg9bHSZjWNcwXODSghELY0OYDMZdpBBkP9tamovsY6DArdUHz2B9YF1knWPueKMTay7rHmNBnyH+/ODLz64Rcw1iS19R8DLJPmcOcP9EDuMm5eV1mM++VxCTrllx5ze8wfvs+0O6GGB4T1BS2WVHPkPkkdXwA1yIUYYy+xUk+3hJUDZ9ZL7DGzCKYdBk/KPwwlAVRfZZH6LWJHaXWDN553ifUE6KmUT2i4l+SN1omRA1XGeCkwPrCFYOXjQspJB9LO5M1Cg3HqzTaK/4qaFRkyDvkC3CTXI6HyHA4sGE9odJ+JvP2J7iBeAZ/NuwKHgrAv6EEDV/eAblBOHIS+fJbVjIsuALj78lwiTTjQeyz0vLth/lY0lhceKlxmqPFo+vIVZftsrI531M6SMaN5YMFhkEJT5/fnsSwYsVF5LO4sVihEDwbkiUh+XDH8xky53FjoWa7UWEL4oEdUNY/W6CH0q/3Y0Q9oeT8YVk4UJQ+/YxtowpY+T7yXdgRtuw7mNloY1+uxIMwJiFzPtUegs4z/ptVP5GGHo3mcwDugi2KLyyzRe/oxRn3vk5Ca5eMWJeIPx8DGfaiTLA+JAQiszn4I5FphtPtnmXDa/MeeDJPuPFe0W7fDtZVNhGx4/Vz4PgAfkSExlqTjVAwFsmOQzK+0sK7uIiK1AqveUzm2zElSHfuf7FF1+YexoKNvIEMgjJh0BxVgvjjFcAMBQgr5E55A/KXGSkTz4efZhlH/lamTUojOx72Up7iTgW5saTTcawOwyGJNY5dszDZDDrEv3P7FeYZR95nu9aGuyj33XwMpbvUMAwAGIM8msEig9EGIMd65NPEFrmBmMKPhgxfPLuPj48dtACzi49Y+9lIJhQH8YoH93Hl5PvmhV37fdkH5xZ//lhLrL2w4vYsYakk3CHg5eguMJHMsk+7sB+jQdb5jlWfZ84Awkpp99ReOWa39nWJM+pggfxiyWuRPaLhXzK9SIUscBDlDMvlwprCgKRH8gvpJSFCEs0f/vQnCxWkOipU6cucakIdWFJQGDmmyifdmOBCXOngKRBJjmsg0DA7YeDrLiGFNr9gj5jPYe0Q5ZxR6FOBEncurDOI6ToT/DGXvpJ2EyEbzCyjycEfEc/2V7NVReCHKUCq0u2vLnwSjpfMsc4qIAyLghVBLJPYAGpQHlMmrLNO49lXLyCdTOuKB6ZY5C0fcovBNJAIJ+5zs4rxiIMOhgyIPDe5REyRkKG4gvPWpEpQ4IyN2kfKytTktaXTcb4kJOQRR+JLkoGJ603M3+ctTROHRw4Rm5nykzWWnbssfZnXu6FTGMngPUg102/lE057MTmWrcLuWZl9h3FEss+Y4MBCHJO34K8hTnMesnudNLEusPOBTtEmXfvJMErWG+2NSlp+6oqv8h+VSFbzcvlZWIhwKocDJ3IthkWgnJL/t4Ddjm8pR1SiBVeaUkEMg/oCiMhIASKjwAumOwukLwsg0T5s0jFb2HNa4HW0mRjGiT7CnecDLtsuUX2C4dljSuJ7VcOWGFRYPsXPzd8TssxYQliu48LmUgcbsMPN5elvRyxos+4xbCVnOuG43LFR/0WAsVCAN9srM24M3IGCz/uXFbfYrW1ptSrtTT+SLJu4MaEMc1fJBr/aeWMQkBkX3NDCAgBISAEhIAQEAJCQAjUUARE9mvowKpbQkAICAEhIASEgBAQAkJAZF9zQAgIASEgBISAEBACQkAI1FAERPZr6MCqW0JACAgBISAEhIAQEAJCQGRfc0AICAEhIASEgBAQAkJACNRQBET2a+jAqltCQAgIASEgBISAEBACQkBkX3NACAgBISAEhIAQEAJCQAjUUARE9mvowKpbQkAICAEhIASEgBAQAkJAZF9zQAgIASEgBISAEBACQkAI1FAERPZr6MCqW0JACAgBISAEhIAQEAJCQGRfc0AICAEhIASEgBAQAkJACNRQBET2a+jAqltCoDII7H3h3m6ppZaqTBF5PfvVV1+51VdfPa9nK/OQ6q0MevGf3WzhZq5///7xH1BOIVDiCIwePdrVqVOnxFvp3NSpU12LFi1Kup3VoY0AmFY7u3XrVrDxEtkvGJQqqKYisMsuu7gmTZos1r1Zs2a5l156qdp0GSG/8soru0mTJsVq8+Q9J8fKp0xCIAkClyx3iRs7dmySR5S3GiFQjrLyzTffdFtuuWXJj9K///3vkn/3qkMbGeg02nnvvfc6kf2Sf63UwJqEwPjx412tWrXc559/XtGtKVOmuGHDhlWqm2eeeaZbZpll3MCBAytVjn/4xRdfdCy2f/755xLlnXfeeW6DDTZwPXr0iFWXyH4smJQpIQIi+wkBq2bZy1FWiuwXbpKmQaIL0do02imyX4iRUhlCIAECLGATJkxwV1999WJPQdZ32mkn16VLF9e9e3d3yCGHuM6dO7tzzz3X7bbbbpaXZy+88EIj2jfccINr0KCBmz17thsyZIiVhxJxzTXXuDvvvNPyb7LJJu6qq65yv/32m2vYsKGbOHGi23zzzd1KK63kHn/8cXOB2G+//dwpp5zilltuOTdt2jSrl+ebN2/u5s2b57p27epuu+02t95667nvv//e9evXz+25556uXbt2tt287LLLOgTJFVdcEYmCyH6CCaKssREQ2Y8NVbXMWI6yUmS/cFM1DRJdiNam0U6R/UKMlMoQAgkQYAGDIM+dO7fiqeHDh7vJkye7p59+2gh9p06d3C233OIeffRR99BDD7mTTz7Z/f333+66665zO++8s7vyyiuNnA8YMMBI/wMPPOC22GILs+yfcMIJRu5JW221laNsyuCzAw44wEj+jBkz3LHHHuu23nprUwDuu+8+N2bMGCtn8ODB5p7z1FNPGdFv27at5T3iiCPcGWec4erWres++eQT+w5lYbPNNjMFZbvtthPZTzAPlLXyCIjsVx7DUi6hHGWlyH7hZmQaJLoQrU2jnSL7hRgplSEEEiDAAvbLL7+49957r+IpyDb/H3zwwe60005zc+bMcXvssYcdasV/E8v+xhtvbD8Qa/Kffvrp7tVXX3XrrruuKQ/HHHOMKQCQfZ8g+9dff73bfvvtXdOmTc0CDykn37PPPmtlr7XWWm7//fd366+/vikHN954o7vpppsciw55+f+zzz4z96AVV1zRdgtw79lhhx1cx44dXaNGjUyByOZnKst+ggmirLERENmPDVW1zFiOslJkv3BTNQ0SXYjWptFOkf1CjJTKEAIJEIjamqaIffbZx51zzjnmz8/fEHGs7SgCb7/9trn34ELz8MMPm7vNxx9/bC45WPRxAwoj+7j3QNqbNWtmZL9NmzZuhRVWMLLP3/jm//jjj+6VV15xu+66qxs1atRiZP/BBx90Tz75pLkDUQ+Hiw877LAKn30O6rIL4Ml+z549HT/BVGtArQQIKasQiIeAyH48nKprrnKUlYyVDugWZsamQaIL0dI02imyX4iRUhlCIAECLGC461x77bUVT+GiA5Hm8+eff95I99ChQ82Szgl6/OOPP/54d9RRR7m99trLXHfeeOMN899/5JFHjGxD5uvXr28uN0HLfjayT9kIASz1a665pvnqY8nnBwsTLkN9+vQxP3/ynn/++UbyCRXmD+hmkv0wKGTZTzBBlDU2AiL7saGqlhnLUVbKsl+4qZoGiS5Ea9Nop8h+IUZKZQiBBAjg8oLrSzB9++235gffsmVL16FDB9e3b1+z7B944IHunnvucQsXLnSLFi0ytx6s/OPGjbODuygIuATtvvvu5o/PQVvIPdZ5Em452cg+FiQWVJQK0vz58213AD99LP7Ui68+ysWqq65q5waog/pE9hMMurJWCQIi+1UCa8kUWo6yUmS/cNMvDRJdiNam0U6R/UKMlMoQAlWIAAQff3pcdiD3yy+/vPvuu++sRlxqvv7664racc9ZsGBBaLjMbE2EuE+fPt2eW2ONNdyXX35pddWrV8/98MMPoXUl6bIs+0nQUt64CIjsx0WqPPLVBFkpsl+4uZoGiS5Ea9Nop8h+IUZKZQgBIZAVAZF9TZCqQEBkvypQVZnFREBkv3Dop0GiC9HaNNopsl+IkVIZQkAIZEWg54ieFtIz7dS6dWsLaZp2Ur3pIH58m+NL/hbPdJBQLTUFAZH9wo1kGiS6EK1No50i+4UYKZUhBIRAIrLfcGbDVBBLQ4iGdUT1pjK8qVwzn05PVIsQ+C8Cb/bs6QYNHuzGNUxHRuaLe7FkXJL2Voc20p802imyn2TmFDkvftXcgkqIxGImLlHCl5sY72kmYsFzyZS/MCpYNxFhuCCKm2l9wredKDOEqSxk2mijjRz1xem/bzOW3q+++sri1edK3KLLrbhx8uYqK+739IfwmxzArYoUdONpMK+B2/DlDauimiXKTEOIiuyns1iVEs5xJ69kpWRl3Lni8705ebK7rUkTN2zNNZM+mmr+YsnWJJ2sDm0U2U8yomWSl9tLuSX1oIMOKmqPub21cePGFuc9zcSNslwk5Q+nBuvm0ijcRIgi4xML7YgRI9y2225b0GYSJWfttde2uPO5km8zl1S98MIL7pprrsn1iEXZef31102p4YIsIvJUVSKO/2233WYkHwWO6BcXXXRRRXVcoHXFFVdU/M8B4O+//94sEYQI/eabb+w7FBnCgkYlkf2qGsHFyy3W4lZu9cYdTclKycqkslJkP+7blTtfseRS7palL7ertWWfEIEQlfXWW88ISL9+/Sz+N59hBSds4CWXXGKWXUgXZAaSRqhC/OKIXU7kkkMPPdR+CEO4+uqruzp16rjhw4e722+/PXTMTjzxRLfjjjta1BJuLr3//vvdZZddZvViWSZUIpcfccMosdQhhRA3rPITJ060mOVchASx6t+/f2gdYX3jtlJP9rm06PDDD3e1atWy/l188cXW7jvuuMPwoL7LL7/cYrBzedKMGTPs5tOffvrJbmmdO3du0vlYkT+4gNF+bnclEcKRuO+EaVx66aUrwkgSsz1zPMIq57lp06ZZux977DHDivEbO3asXfLE+Pbo0cPIKFEXuE0Wknr22WfbWHuyz+VSWMcvuOACI9eQfS6PuvTSS20OfPDBB3bpE/gxB4gvzxhxeVVmYoxpF1FvmGNcJrX33nu7DTfc0OZUrrElbj5tZh5A9rnNNmyMyEffmZ+zZs0yIn3yySdbRJy33nrLtWjRwnXu3NnGD4wbNGjgevfubc098sgjDV/i8icdV9pFbH7KYN6MGTPGtW/f3qz8mQnlCSzYrSCmP0oJWPzxxx8555LIfk6ICpKhWItbudUbd7AkKyUrk8pKkf24b1fufMWSS7lbJrKfCCNIKxcIEQccq3fdunWNbHP5EP9DSiDHuHI89NBDbpVVVrHbST1ZJJb5oEGD3BNPPGGWasj5cccdZ6Qfgsb/Yem8886zi43OOussh0UZEs4NpRBrfiC3EDWIJCQcxYH6IeDEQofkQ75pO64nYSmsb++//771B8L50ksvGdGkz7SZPkPGUDLAA2sw/YaIYyUmhvuQIUPcsGHDTBnhd77JL2DgR7/oK6T7uuuus0uYILMQaIghRDBsPMLqhrQTF572Q34J/8gYTpo0yWLPc3EUFzsRNx63E6zeAwcONFypG7J/1113mYUZ8lq7du0Kyz6KCCQf7FEC+P+vv/4yCzUknzGCzGcmyiTsJcQaYgyWEHAUDfDPNbbkp81YxyH7P//8c+gYvfbaa0awqe/UU0+1G2sh9sccc4zdmIv70s0332wKJfHvUX64AIsEZlx2hUJCeE7axYVbYDRgwIAl+kReH06TvuMWxtxAgaK91MltvZkJBYx2gjnKEvlQNn/99VfDYfTo0ZFTSmQ/37ct2XPFWtzKrd64oyJZKVmZVFaK7Md9u3LnK5Zcyt0ykf1EGEF88GuGfHApEJZrXiw+8yQH8gJhhKh9+OGHpgRASohNftpppxkxx9LPhUVB1xT8sSHVH3300RJtguxDrJlIWOtRMFAQosg+1lyUgqZNm9ptpSgGWO6xuEddix3WN5QPyP59991nly5BXEkQekg3JBjFA0s2+XC54BkwwIrNrgfWaFwvwCHf5BcwFAvaT/2QX366dOlixBtijeWX3ZGw8cB6nZkYv5EjRxrxhKyDLQoV5aC4eOJMP7H6swNAPsg01m5ILuQTjNlp8W48YALWjBPKB+QYBYHdHXYA9thjj0goKB9lBfIM7pTD+Huyn2tsM8k+ClDYGEGi2blAIWAsIfszZ860OUrf6SsKFFigrLGzlHl2Aet+r169bBcDMhApOtoAACAASURBVM8z++677xJ9A1Ov2DCHUHbBlOTJ/pQpUxZ7rlWrVqa84SZFvX4nDMWBOtl1YI5H+fyL7Of7tiV7rliLW7nVG3dUJCslK5PKSpH9uG9X7nzFkku5WyaynwgjiAqkCJKHtRXLJhZGPodwkiBpWNNxccAyCbnH+gvxhyAGyT4uHljJSZAvSBHkLjNB9vztoSgZzzzzzBJkH+sx7jpY9nPdYBrW6bC+4asPiYekosC8/PLLFY/SNyzQRx99tBE2LMgQfU/2cbfAUo5F2Ls8JQEbqzs3t+IyBMHFjYRdEizDfI4lGPLvyb73T6ddYeMRdfgUJQvrNOPTqVMn9+mnn1p7sXYHyb4fS+pEmKLwMQ94DncUdhgg5PjsQ9ixgtMmn1B8UCbinIGgfPpFudxMixLlyX6usc0k+xDlsDFivrGbQ3/DyD7tZK6CP8R7v/32Cx0+iD67ELgZ0XewzExY8r3rDQSeucJZCNzA+I7dpkzSfuutt9pn3i+fd4UyuISL5xg3dsPAFYWbn2CqNaBWxb86oJvkzUuWt1iLW7nVm21UJCslKysjK12tWjqgm0zsReYullxK2vw02lmtffY9ocayi4URAo5FGWLEZ127dnV9+vQxSzoEKhfZJ2IKrjcQRdx7oiyVUWQfML/44gsjptT1+++/5032w/r2zjvvWN+wAGNlhgxDtiDcWHEhW7iCYImGlOIrWCiyj4Weuij7wQcfdLTl888/N5zBjM8hgrg3ef90/OUZl7DxiHoZ7r77bhvHk046ydycNt10U7NmY5XPRfZxgaEtkHos/1jA/QFdFCPOF+AehMIzZ84cwyoX2ScvPvMokShu6667rik3+ZL9bbbZJnSMosg+Y83uAwnXLYg1BJ2dH59QdNm5QaklAhGEHcWHBQcf/MyEosg8JTFuCBp2D1AS2AmgPt4ZMEL5IFEmyrHfTQIXFF3GnwPL7BTRt6gky35S8Z9f/jQWjbCWlVu92UZHslKysjKyUpb9/GRfKcmlpD1IQ35Wa7JPJBGIz6qrrmrE5pRTTjEXFSygEBF8tiGP+EtHkX3IIG48JE/uIVQQxWAUkuDgRZF9iC6HQ/F9xur57bff5k32w/oGIWO3AAsw/cbCS124JKHYYL2nfpQM/L0bNWpkOxz4tnvLPthgKU/qxoNlG+LuE9ZyDoNiaeZwLG5QtAUivNpqq9kBU0g/CkfYeES9DBx+hrhiXcYCjl844wKeUWSfqEAIV85iQPYhrOzK8MPnuDWBAcoQibkCMQZHj2dUe+g3ihd+6cwL3MOwnudD9sEEkh02RpBudm7Y8fCWfbBFkcNqzrkIXAJw38GVhoPlPjEuKKi8zLhpxTkw659ll4L6OKSLCxQ7JLijQerZQaJsFBzKRgnw9aKQ+QPs4Mk5DcZZZP+/CKQhvEtpcSu3/mZbzCUrJSsrIytF9pNS5ej8xZJLSXuQRjurNdn3gOK+A+ENJtwYIKP+IGIu8CFSRNbBog+h4TnINZblzIRLA2Q6LEEIcXEJO+wZlj9XHWF98+VA5jmEGTxXgJKDdRdSScQfiF+QGObCIdv39K1ly5aOg8JY+UkQfNyHcBfBwkzdYaExg+MBUcTHOzNhdafsqkq0j/bjj57ppoKrDkpjMM2fP9/IM5jSfm5iLUQc+iRjRJvBnTHkLANtRNEKJvIkIfhh+IILVvywOwyyjQeuTd76ny2fLPtVNasXLzeNRUNKRu6xlKyUrMycJXFlpch+7vcrbo5iycO47fP50mhnjSD7SYENy+/JPpZlnzjguc466yyRnagwhSLQadRRCHwKWQbhLDkkm5nw+w87uFvIuqPKQulCSQsmrPmcCSmFhOUcpYpIRWGHxkuhjSL7/yCQhvAW6S7eDkpa75tkZXKkq7usFNlPPuZRTxRLDiftQRrtFNn/36jgzkCKstgnHTzlFwKFRAC3Lg5Y+x2VQpadRlk9R/S0sKI+NZyZzlXwaQhRke7ike5ijW8a74zqyA+B6i4r3+zZ0w0aPNiNa5iOjMwP5eK980naW13kQxrtFNlPMnOUVwgIgbwQENnPC7bED6WxaEi5STwsekAIxEbAk/3YDxQpIwFNcG0t1YSyVCx5mBSTNNopsp90VJS/5BHgHAGXihGRh4hBJM4JEGrz3XfftbMYHH5NkjjMy2HZpD7tSepIIy/nBTjgm7T/lW2bfPYri2C859NYNET2441FdcglWRk9SsWSlbjxKFUOgY/q1XOHNG8ush+AUWS/cnNKT5cgAsSgJxIPITN9vHeiDxFmk/CZPhxnrqYTNYfDr0SoIRKQj2iU67lS/p7IOsTUJzpRmklkPx20RfbTwbmm1CJZGT2SxZKVIvuVf7tE9pfEUGS/8vNKJZQYAtxBQIx4bvcl7CYRdHKRfUJM+pt0CTvpLwIjHCX3BRD6khtt1157bYvUhKWf0KooE4QKJR/WcpQJwqIuvfTSFvmHEGw+Ug67C3zHQeDZs2fb7gO3KvMZUZWI+8/dAnXr1rU4+ewisBvBDgUhQomuRLu4L4CL4aiTBYnyCTvL3Qd8x43GpPHjx1vo0WB7uDSOEKWEB4T0E6YU6x6CgEPqfM6lQLSfKDuEKyX8KSE/OSxIxKr777/fbigOez5qKojsp/OSiOyng3NNqUWysvRkpch+5d8ukX2R/crPIpVQ0ghAmp944gnXoUMHI8aQZha0bGR/lVVWsUu7uGUZIk58f0KDQoqx7GMFf/HFFy1SECSXC8y4rIqLpbjoijog6BBp4uWzq7DmmmsucfkVOwrLLbecxeqHgNMu/B65lwBSzue4HXFhFpd4Ef8ews+dAPRlxowZpgxwB4GPEkSd3GQMOWcngmdQTFBwuGOB+PtEp/DtIbQpt/7uvvvudvMzF6FxoA2Cz0Va9JMfbo72txI3b97cLkujX9x50LFjx8jnRfb/i4BIdzpiolg4p9O7qq1FsrI0ZaXIfuXnvci+yH7lZ5FKKGkEIPVY6LnwiZtdCZ8JIchG9rHOY9GG0JKw0EOuia0POYfs48bDZWooEpBxdg74rG/fvhW3ymJRh2xzkyw3OXPzbzChGGAN55IsLPJYyVEYKIMbankO63qvXr3c9ddfb8Qayz9Wd4g4bYGEs2MB2acuCD1knLsfyMN3tINLv/ihX+Tx7SHkKWSfm5W5aRh8UHRQCGjPuHHj7NIwkr+VmjsUCBELjuw0kJ88mc+z0yGyL7KfpoAQ2c8fbcnK0pSVIvv5z2n/pMi+yH7lZ5FKKGkEIMOEp+QWYdxcGjdu7Nq3b28W9yiffSz43KfATbwkbpCF1EPIg2Tf++zffPPNbt68eUb4999/f/fyyy9XYPLYY4857mqACOP+4xOuMq+99prVAXnGJQdy/eijjxqJxmoPWWfXALIPIed/biCG7EOuCQ8bJPsoHaNHj7Z+oTTgf8shYm4x5s6C7t27V5B93x5P9imb/rEzwE4BOxhY97lUju9IuASh8KAo4IKESw+H1tgRQKnIfN7fQI3y5M9K+P7XGlCrAosG8xq4DV/eMJV5VCwyqHpTGd6i7aCk07uqrUWysjRlpav1j6ys2hlQc0sX2RfZr7mzWz0zv/aRI0eaRdz7yeOSMnbsWFevXr1Isg+RxhUHKzg31I4aNcpI/Kmnnurq169vrjPBA7qe7F977bXmitOpUydTMCDakHV83DPJPsPDjsEbb7xhLjsoFFzOhrsRln5i0A8bNswIPeXGIfvTp0+3tnFOAGWFOrt16+batWtnyg0uOuxWUF4m2efGYLDCnQeXH3YWUA6w8LOT8eGHH1p/6B8KUybZp57M5ykz6sIv+eyn84JKyUgH5+pei2Rl6cpKWfYr/3aJ7IvsV34WqYSSRQCyjP87Pu4+4RbTokUL99xzz5kycMkll4RG48FCzg217AZMmzbNHXTQQUZ+OfgK8eZ30LKPywrb4Pjet2rVyg65fv31165r166mXDz//PNGsoMJX3dce7Ci//LLL0a0cbnBol67dm2rGzcfXGvikH0Ug0WLFtmzvXv3tnajUCxcuNA+p01Y+cHEtydo2SdvkyZNbBfE71LQFvpAWzgvQJ/ZqQiz7Ic9HzU5RPbTeW1E9tPBubrXIllZurJSZL/yb5fIvsh+5WeRSqixCBBtBuLsY/PTUQj1ggULst5c26hRI4umE2XVzgQMgo1iEEz4xGNN9zsSuUDGZ//GG290U6ZMcVj4/c26EHyUFlyFUCqWX355991330UWR71z585drD3sgrBTgPsOVv9sKez5sPwi+7lGtDDfi+wXBkeVkh0Bycr/4lMVslJkv/Jvn8i+yH7lZ5FKEAIlgIAn+7jZVIcksp/OKInsp4Ozaqk+CFQ3WSmyX/m5JbIvsl/5WaQShEAJIEBUnJ9++in2TkCxm9xzRE87l+BTw5kNU2mSyG8qMBftoGyxxjcdVFVLIRCobrLyzZ493aCArCwEBlVRBmGjJ5fwbb/jGjYsmlxKincackyXaiUdFeUXAkIgMQKciahTp07i5yr7wNSpU+3MRtpJ9aaDODgTNUpJCNQUBIolK5PiVywZl6Sd1aGN9CetdhJYo1CpVuvWrRcVqjCVIwSEgBAQAkJACAgBISAEhEDpICCyXzpjoZYIASEgBISAEBACQkAICIGCIiCyX1A4VZgQEAJCQAgIASEgBISAECgdBET2S2cs1BIhIASEgBAQAkJACAgBIVBQBET2CwqnChMCQkAICAEhIASEgBAQAqWDgMh+6YyFWiIEhIAQEAJCQAgIASEgBAqKgMh+QeFUYUJACAgBISAEhIAQEAJCoHQQENkvnbFQS4SAEBACQkAICAEhIASEQEERENkvKJwqTAgIASEgBISAEBACQkAIlA4CIvulMxZqiRAQAkJACAgBISAEhIAQKCgCIvsFhVOFCQEhIASEgBAQAkJACAiB0kFAZL90xkItEQJCQAgIASEgBISAEBACBUVAZL+gcKowISAEhIAQEAJCQAgIASFQOgiI7JfOWKglQkAICAEhIASEgBAQAkKgoAiI7BcUThUmBISAEBACQkAICAEhIARKBwGR/dIZC7VECAgBISAEhIAQEAJCQAgUFAGR/YLCqcKEgBAQAkJACAgBISAEhEDpICCyXzpjoZYIASEgBISAEBACQkAICIGCIiCyX1A4VZgQEAJCQAgIASEgBISAECgdBET2S2cs1BIhIASEgBAQAkJACAgBIVBQBET2CwqnChMCQkAICAEhIASEgBAQAqWDgMh+6YyFWiIESgaBvS/c2y211FKpt+err75yq6++uuqtYgQKgfOBGxyYuJVTp051/fv3T/ycHhACpYrA6NGjXZ06dUq1eRXt4t1r0aJFSbezOrQRANNqZ7du3Qo2XiL7BYNSBdVUBHbZZRfXpEmTxbo3a9Ys99JLL1WbLiPkV155ZTdp0qRYbZ685+RY+ZSpPBFY+rel3WZPbZa48//+97/d2LFjEz+nB6oHAuUoK99880235ZZblvwAVYd3rzq0kYFOo5333nuvE9kv+ddKDaxJCIwfP97VqlXLff755xXdmjJlihs2bFilunnmmWe6ZZZZxg0cOLBS5fiHX3zxRcdi++effy5R3nnnnec22GAD16NHj1h1iezHgqlsM4nsl+3QZ+14OcpKkf3CvQtpkOhCtDaNdorsF2KkVIYQSIAAC9iECRPc1VdfvdhTkPWddtrJdenSxXXv3t0dcsghrnPnzu7cc891u+22m+Xl2QsvvNCI9g033OAaNGjgZs+e7YYMGWLloURcc8017s4777T8m2yyibvqqqvcb7/95ho2bOgmTpzoNt98c7fSSiu5xx9/3Fwg9ttvP3fKKae45ZZbzk2bNs3q5fnmzZu7efPmua5du7rbbrvNrbfeeu777793/fr1c3vuuadr166dbTcvu+yyDkFyxRVXRKIgsp9ggpRhVpH9Mhz0GF0uR1kpsh9jYsTMkgaJjtmUrNnSaKfIfiFGSmUIgQQIsIBBkOfOnVvx1PDhw93kyZPd008/bYS+U6dO7pZbbnGPPvqoe+ihh9zJJ5/s/v77b3fddde5nXfe2V155ZVGzgcMGGCk/4EHHnBbbLGFWfZPOOEEI/ekrbbaylE2ZfDZAQccYCR/xowZ7thjj3Vbb721KQD33XefGzNmjJUzePBgc8956qmnjOi3bdvW8h5xxBHujDPOcHXr1nWffPKJfYeysNlmm5mCst1224nsJ5gHyvoPAiL7mg1hCJSjrBTZL9y7kAaJLkRr02inyH4hRkplCIEECLCA/fLLL+69996reAqyzf8HH3ywO+2009ycOXPcHnvsYYda8d/Esr/xxhvbD8Sa/Keffrp79dVX3brrrmvKwzHHHGMKAGTfJ8j+9ddf77bffnvXtGlTs8BDysn37LPPWtlrrbWW23///d36669vysGNN97obrrpJseiQ17+/+yzz8w9aMUVV7TdAtx7dthhB9exY0fXqFEjUyCy+ZnKsp9ggpRhVpH9Mhz0GF0uR1kpsh9jYsTMkgaJjtkUWfYLAZTKEALVCYGorWn6sM8++7hzzjnH/Pn5GyKOtR1F4O233zb3HlxoHn74YXO3+fjjj80lB4s+bkBhZB/3Hkh7s2bNjOy3adPGrbDCCkb2+Rvf/B9//NG98sorbtddd3WjRo1ajOw/+OCD7sknnzR3IOrhcPFhhx1W4bPPQV12ATzZ79mzp+MnmGoNqFWdhkhtTRkBkf2UAa8m1ZWjrGRodEC3MBNUZP8fHGXZL8ycUilCIDYCLGC461x77bUVz+CiA5Hm8+eff95I99ChQ82Szgl6/OOPP/54d9RRR7m99trLXHfeeOMN899/5JFHjGxD5uvXr28uN0HLfjayT9kIASz1a665pvnqY8nnBwsTLkN9+vQxP3/ynn/++UbyCRXmD+hmkv0wIGTZjz09yjKjyH5ZDnvOTpejrJRlP+e0iJ1BZF9kP/ZkUUYhUGgEcHnB9SWYvv32W/ODb9mypevQoYPr27evWfYPPPBAd88997iFCxe6RYsWmVsPVv5x48bZwV0UBFyCdt99d/PH56At5B7rPAm3nGxkHwsSCypKBWn+/Pm2O4CfPhZ/6sVXH+Vi1VVXtXMD1EF9IvuFnhnlW57IfvmOfbael6OsFNkv3Lsgsi+yX7jZpJKEQBUjAMHHnx6XHcj98ssv77777jurFZear7/+uqIFuOcsWLAgNFxmtmZC3KdPn27PrbHGGu7LL7+0uurVq+d++OGH0LqSdFuW/SRolV9ekf3yG/Oq6HFNkJUi+4WbGSL7IvuFm00qSQgIgZwIiOznhKisM4jsl/Xwq/MBBET2CzcdRPZF9gs3m1SSEBACORHoOaKnhfRMO7Vu3dpCmqadVG9yxBvObJj4oeqymCfumB4oWwRE9gs39NVFPqTRTh3QLdy8UklCQAiEILBwuYXusccfc3vvsnfq+KQhRMM6pXrTGepi4ZxO71RLuSGw9KJFFhVtyyx3lpQKJtXh3asObWQ802inyH6pvDk1oB3EiCdMpL/QKY0ubbTRRo5oMMSbD0tEk8HXHR90n4g5/9VXX1ns+EImYtVzCVau/vs2c9CWKDiE0cyVOEDLodk4eXOVFfd7/F+pl1tzoxK38nJ+gIO7UQmyP+qBUe6IzkfErbpg+dIQoiL76SxWpYRzZSeoZKVkZdgcWubvv93Lkya5LXfcsbJTrMqfL5ZsTdKx6tBGkf0kI6q8JYEA24/77ruvmzVrVmrtIWLN2muvbTHgwxJhKUePHu3uvvvuiq/vv/9+98ILL7hrrrmmoO3kxlsuuvKHZ6MK920mzv2IESPctttum7MdXJxF2VyYddddd7nbb7/dYt9XVSLEJhF3OOwL6T/yyCPt1l2fOMR78803m2JDuE8i91x00UWhzRHZr6pRWrLcYi1u5VZvZUdUslKyUmS/sm9R7ueLJZdyt2zxHGm0U5b9pKOi/JEI+AWM21yJIb/KKquYVRiiCCEkBCShHLGoQ7QJ54hVngudiA3/6aefLlH2hhtu6EaOHGnx3onzDtnlbyzixJ0nvjx5INpXXHGFlU9YS6z5hx56qMWgh+w/88wz9psfbqaF7A8bNszaSfjJ33//3Q0aNMg98cQTFut+2rRppkR06tQptL8QbxQbiDAv0ZVXXmnP9ejRw0gvUWx4nrCY4EKcfIgzbeJSLNp82223VZD9/fbbz0JaEvaSurkwi4upeI7499QB+X7uuefcQQcd5P766y/3n//8x33zzTfu7LPPtgg9EyZMMIz8LsZll13mZs6caRdk/fHHH7FnLlF4Xn75Zde5c2c3d+5cU5TmzZvnTjrppIoyCPvJRV3kQXnp169fpNIish8b+kpnTGPRCGtkudVb2YGSrJSsFNmv7FuU+/liyaXcLRPZT4qR8pcQAn4BGzBggKtdu7ZZoiHTX3zxhRFbCPm7775rBzX5brXVVrNLocjPjbEnnnhiaG9ee+01I/aQX4j5ySefbAQTgos7zsYbb2yW/eHDhxuZ//DDD93AgQON+KJgcAMsJPujjz5yxx13nPOWfdxoIObcPMtFVTvuuKMRVnwmUUBoJxdcZSbCW6I8cMEVN9xCtiG+r7/+ekWdKDrchEv5KATEzffKBGSaNl966aUVZH/ixInuvvvuc2PGjDFXKOom5j7Civ9pE89z2RYKjI+9jyJFu1Eyjj76aLfTTjtVNJc+H3PMMaZATJo0yZQhxoLbbzlAGkyM3R133GEfbbHFFqaIbbPNNvY/SgOhP1FufCJUJzsMKAOMI+WfdtppoeMnsp/eS1qsxa3c6q3siEpWSlaK7Ff2Lcr9fLHkUu6WiewnxUj5SwgBv4DhxsIFT5DVNm3amKUayzduH5BpYrm/9NJLRoAhsZBKvocwhyVIMD75WJGxjmN1huBisaYOT/ZRLDxBhTyjSKAYNG7c2HzKKZ/fnuxjXeeHtpAg/BBhlBIs1Vj/wxLkne9+/fVXs6ZDkrkUK0j2UTjOOOMMUz6Igw8R5lZaLP3gk0n2URrw+YdUcxEWeVEYIO/sRAQvx8J9h90OzgfQDi7eoq+zZ8+2dmcmdkJQctZbbz3bVYGYt2jRYrFs3MbL+JDYNUGx8O5FKDMoWUGyf/jhh9suC8rbOuusY2O62267iez/D4FiLTKqt4QEYpamSFb+Q/YlK/+ZKPLZL+z7Wyx5mLQXabRTbjxJR0X5KxDAvQSSiXXZu3507drVrNL4mEOiIYL4dUPmvX86ZBlrPRMcP3BI+OWXXx5J9vv06WNKA4SYMnCDadq0qbm48J0n+/6mWBrofcgh+9w8i2sPxBv3IU/2d9hhBzuAioXfJ6zf3E7LAbowtyKfj/5BoLfeemvrO4d+UUj8bsJjjz1mhB3rN4sZOEWRfZ6lvewmoPxgvecGXMg+VnbcdsLIPi8vVv+33nrLMMGKj2tPZgJvFAna26BBA3fWWWeZu1MwsUOC6xOJnQvOE4A5yhGYkMDaJzBiNwbXIw7pehcilAl2YPjx6cv5X7p5v87TAd0UZEcai0ZYN8qt3qRDKVkpWRlHVrrffnNu0SId0E36gkXkL5ZcStr8NNopsp90VPLIX9noL82bNzdXjLfffjuP2qvuEQ5pEh0Gi3urVq3MZQVSCmGFeGORxw0G0gxhDB5G5TnIMYoBvvMQTFxRwhIWadxb8EWHRGLRxjqOJfm8887LSfYh+ZBc3G46duxoFmws4ighe++9t1n0URgg4x06dDC/+GxkH6KPZR0fesgw/cVl5s4778yL7OOfz4uIRR1SQDm0hfMMUWQftyj6RZ8g+ZB3djuCCRLOAkOf8PEHt+uuu87cchivYCIWfdAnn90Txsy7F4EZh4JRKlBM+vfvb21lV2HTTTe1XRaUp7CzAXLjqbp3MLPkNBYNkf3k4ylZKVkZR1bKsp/83cr2RLHkYdJepNFOkf2ko5JH/lzRX3JFV8H/HMs0ltlSSw899JBjIatVq5ZdXsRB2yFDhpj7CQQT95YmTZqYXz6HOr1rSJcuXczXHd9+nsWHHxIZlSCfHIAlSgw7Bvzgax+H7IMvPu5Yq7Fe42OOEgKphjBjQa9Tp44d9sXqza4D1vRsoTlRVugXB3vnzJljlvOgG0+YZR9lBzceDi1nuvGMHz/edhlI8+fPN+Xu8ccfd5tvvrkpHkHLPuVA1jm3wG4AyglnCMAjmLD6k1BMKCtJ4owEOzYk+odChGKDu0/v3r3trATkn10NEgebGZuwJLKfBPnK5U1j0RDZz2+MJCslK3PJSpH9/N6tqKeKJQ+T9iKNdqZC9jfZZJPQSCkQLPydsdxiZcWVg8OHkDKsuBwCxCqMGwZ/E1kEX3DIGZbG1Vdf3aKEYO3FpSAs9erVy6Ki4DONJZL62rdvb3XgQkLbfvrpJyNUELywvFhGfcSTqVOnOvyVsaziugCJ7N69u1k1IbpYPL3vMgQO941sZB/XCB9dBZ9oLLX4aJMgaJBkT/bxkYagQvCwwoZFkgnrFzhz4BJyCtHELx0/8kIl3Gvw2Q7Gsgfrn3/+2dqKogLpD4vFvtlmm5mbCxZhrOOrrrrqYs2C+OJDX5WJ+Yf/fbD91AeRhfRmJtoLuaXtHFAtFJbM8enTpxuWKFCZ7Qm2A0ypm0PKjDnRfDLfAdqfJApPZj9XWmklU4w++eSTSPg580Co1Wx3C4jsV+XsXbzsNBYNkf38x1OysjDrTk2VlSL7+b9bpSSXkvYiDbmdCtnHKhkWKYWDgxDlI444wtwBcGOAKGMhhYjjynDJJZcYySdyCS4IkGHIMSQdIk3EE0gkVs6wBFFu1qyZuYjgfkEYQfysqQMSgxUaNxJIOwQqLC+kG0sw1lpIM64RlItiQhvxC8d/HAKO9Yb/aRPtpY8oGFFx3QlJ6aOrYImlrqFDh5rbCdFVwANFhbZh7f3Xv/5lVlb6EBZJBteYWmMW1gAAIABJREFUzH5BHDn4iUUWBYG+g2epJcaffgcTJLwq48lnwwAlyStewXyQavzki53YPeH9eOedd+ywbKkmkf30RiaNRaOUFtVi9Te9EQ2vSbIy2QhUF1kpsp9sXHPlri7yIY12pkb2wyKlQKDxoca1A99k/LaxkENGcfmA5OEPDamHBOPmATk/9dRTzeqJSwZElmd9FJbMwc9G9rHQY6nHcky8clwywsg+3+N2gQUVl5APPvjACHom2ceVg6glEHTy84O7CqQ/2yVOProKllTyQ+ZJuIrQb/qIfzgHLWkzB0pRUMIiydx6662WJ9gvQk5iocYFAz96+qFU/RFA+WMnhHCapZwg+489/pjbe5e9U29mGkJU5Fc36KY+sVVhIgSqi6xcetEi4zxbRkSmS9TpKs5cLNmapFvVoY30J412pkb2wyKlQKyxXkPucfmA6Huyz8FJCD2+2rjiQFQ92ceaj3uPd58gHCO+22EpSPZRKojH7i37vg4OF+LegiuCJ/vBvFjfsdpjHSCMI7sEWMY92b/gggvMtxrLOdZ5yDiHadm1SEL2cQsheg0HNkmUj7sIVn1ciX744Qdzi2Fng4ELiyRDTPnMfhGOERcZ2oLLCpFeGA8lIZAWAj1H9LTD2A1nNkyrSqsnDSEqsl9+OKc6iVVZWSHwZs+ebtDgwZF9HtcwXRka1ZBiydYkk6E6tDGtdaqoZB8LNOQVqzMEFKIdh+wzgBwQJI45z3LLqbeGZ04EfOLZPSBiDP70hDWMIvscRAzLiyWcerCQ44fPQU52ErCossuAew+7AnxO6EVcesiPyxHtYlcjl2WfMwDEo0cZgZRjsQUTdguItgLhJ3wlB0oh77jlhEWSYbckk+wT1QW3E1ypLr74YnNn4uxDoRPnHlB2svlvF7rOjTbayKLW4L4UlnCjYgco6P9e2ehIUX1grIh4k6v/vs0ohWzHs4OTK6HYtW3bNlbeXGXF/Z6dJOpFEY5KYM8ha85kZEuT95zsVpy7olv/tfXjVl+QfMUS9qq3IMOXs5Bi4ZyzYTkySFZKVkZNkTcnT46cPcPXWMPdvPrqlZ1+BXm+Orx71aGNZUH2OVxIBBGIModkIbS4vOB/HGXZJ2wg5ANfZXyq+cF3HV/5sIQlG5KLiwy7B1jpM8k+fveUye+wvLgR4UbDDgIEyN+2Stv5n0OQkB2i5UDcCIPIoWK+g9D56C+Q/rAUjK6CmxJEngg1kH+s+OxOUAYKBGWwA0A8+7BIMuySeOx8vwifyO4D/u/gxaFfdgwKnfxFMRzYTCtxVoND1ShJYYndEXDirIZPuaIj5dt23L2IKkRY0GzJtxklMhiONNszKGiUzbuRK3pTvu0PPkdkneCcJzIP52h84t0F02WXXdY+4lwNSmlUEtkvxKjkLqNYi1u51Zt7JLLnkKyUrIyaISL7lX27/nm+WHIpaQ/SaGcqlv1sHcdyCHHASg4hhzhDquMkXGdw54HEYyEl3ndmwofdKxJEL4mTfKSTYF4OxtarV8+s+z5BnDlQGrR8QvAh61iTiYZC3yB/uSK7BOvERx9raVSEoWC7oiLJZPYTnPHxJzxmWFScOLjkyuMXMAggSglRk8AGooiShesQihAhLbncisPOWIbZ3SGSUdglVrSZ0JFY6Nk1gezyN+ON8sNuCnkg2sSFp3ywBH9clzzZJzQlpJ8fDt2y08K5h7CIRoT4RNFCiWDXKCxBvImexHjzErHrwnO4eKF8Mld4nlCb4MJuD/OaNhGrnjazm+PJPtF0OPiNbyl148pFlCmeI7IRdUC+CbPpozdxgRZnTXykKH+xld/FQDGGhOOmliQqj78gjd0w3hlIPVGvgnH4cZvjfWNMwODEE080RZlIQmFJZD/X21OY79NYNMJaWm71Vna0JCslK6Nkpch+Zd8ukf0wBItO9gs1rN7fPbM8yGBc5aFQbQkrp9QjuxSi734BY+cAZQVCDJlGkYPYQsjfffdd893mO3Yr2DUhP4oNhDEsEfceYg/55QA00Y6wjqNI4c7lb9ANi/iEgsE5Bkg2ihq7L96yz65LWEQjDkihgNBO3KIyEwfJUR5w0yKUHmSbW4CDcfZRdDhbQvmQ4b59+7pBgwbZjgpkOjPOvr+8isvDcIWi7pYtW5rPOf/TJp7HDc1Hb0KxQZHiMi2UDM6/cK7EJ/qMtR0FYtKkSaYMMRaEXm3duvVi3WLsiBpF4sA7ipg/9I7SgAKLcuMTChhud2CBIoFSTTjcqCSyX4g3LHcZ5Ua6i9Xf3CORPYdk5esVFxBKVi4+V0T2K/t2iezXaLJfuOmhkvJFwC9g7GRwJgCy2qZNG7NUY/mGDHLwGYsGl2JBgCGxkEq+hzCHJSII4ZOPpRnFjUPbEFws1tThyX5YxCcUAw5Vs5tB+fz2ZB/relhEI5QSzkVg/Q9LkHe+wy0KazokGTeuINknFj9nSthJ4CA54VNxq8LSDz6ZZB+lAZ9/SDWhasnLIgh5ZycieKmWj97E+QDaQdhalKbZs2dbuzMTOyEoOewCsasC8W/RosVi2TgA78k6uyYoFv4CNJQZlKwg2R83bpzdc8GZFc5AQLrIH7VrJLKf71uV7Llikd9yqzfZqCyZW7LyH7IvWSmyX9n3Ker5YsmlpP1Jo501xrKfFFzlrzwCuJdAMrEue9cPzhJglcbHHEJPdCEOrELmvcsKZBlrPRMcP3BIOBeqRZF9DlmjNECIKQM3mKZNm5qLC995sh8W8Qmyz/kJXHsg3rgPebLPYfCwiEYQWQ7QhbkVedToHwSa8x/0HcKLQoJbC+0Iu0E3iuzzLJGm2E1A+cF6z+FsyD5Wdtx2wsg+Ly9Wfw5fgwlWfFx7MhN4o0jQXtzOuCWYXYFgYocE1ycS1nrOE4A55J0dARJY+8S5ASz63JFBQtFhZwalgR0YfoKp1oBaOqBb+VcuZwlpLBphjSi3enMOREYGyUrJyriy0tWqFTm9dEA32ZtXLLmUrJXpRDMT2U86KspfgQBnCziQjMW9VatW5rICKYWwQnixyOMGA2mGMAYPo/Ic5BjFAN95CCauKGEJizTuLfiiQyKxaGMd5z4DfMdzkX1IPiQXtxuiLWHBxiKOEhIW0Qi/+GxkH6KPGws+9JBh+ovLDLcb50P28c/nRcSiDimgHBQDzjNEkX3cougXfYLkQ97Z7Qimhg0b2jkAwrb+9ddfhhsH4HHLYbyCibMcQZ98dk8YM+9eBGa33367KRUoJhB7sORMA9GFGFuUNfns/xfVYi0yqrc0BbRkpWRlXFkpN57CvcPFkodJe5BGO0X2k46K8i+GAFGQWMiIHgRh5KAtEYVwP4Fg4t7SpEkT88vnojTvGkJ4UdxD8O3nWXz4IZFRCfLJAViixLBjwA/RkOKQfazQ3JCMtRrrtY+OBKkOi2jErgPWdFxeohLKCv0ikhQhW7GcB914wiz7EGLceDi0nOnGQ0hXdhlI8+fPNz97LlTjEDqKR9CyH4zexG4AyglnCMAjmLD6k1BMKCtJ4owEOzYk+kcEKhQbLPfcJ8GtvZB/Dq571yh2Z6KS3HiSoJ9/3jQWjbDWlVu9+YyQZKVkZRxZKbKfz9sV/kyx5FLSHqTRTpH9pKOi/EsggHsNFt1gLHss1ERJgoziQgPpD/Pn5mA1Ppu4hGAd536BYIL44kNflSkqolGuCEq0nWg1/nK3yrZxgw02cNOnTzcsUaCCeGaW7aM3cUgZJYZoPpnRm2h/kig8mXUQHQvF6JNPPonsGu0EgyiLvn9QZL+ysyPe82ksGiL78cYiLJdk5df5gxd4sibLSpH9gkwRK6RY8jBpD9Jop8h+0lFR/ipDADcW3G2CiUOwHEgtRir1CErsnnDfBFZ2QpKWchLZT2d00lg0RPbTGctstUhWJhuD6iQrRfaTjW223MWSh0l7kEY7RfaTjoryC4ESQQBXH3ZCCKdZ6qnniJ52PqPhzHSvek9DiIr8Fs+CVqzxLfX3Te1bHIHqJCvf7NnTDRo8OHIIxzVMV4ZGNaQ6vHvVoY1p7UCI7EsqCgEhUOUIeLIfp6IV5q7glvl1mThZc+YplrBXvTmHpiAZioVzQRqvQoRACAK5yH6pgMZdLZzTK3bKpvxUF/mQRjtF9os9U1V/wREg1CSx54kk4/3Y8fEkOg2XerEFzgHbJIlDshx4JYJPdU4cAm7btm3i/le2z7jxxE2bPr2pyH5csDLypbFohDWt3OrNc3hK7jHJyughKZaszObGU3ITqMgNer9+fXfYxhtHtqJYcikpLGm0U2Q/6agof8kjwGFVYskTf97He+eCK8JYXnzxxYuFAM3WGaLbcMh14MCBFmmHMJ3ZDs2WPDDO2f0HRD5K28dfZD+d2ZHGoiGyn85YplGLZGU0ysWSlSL78We+yH58rET242OlnNUEAeL7Ey6SW1/9ra65yD4hPbmplkSYSn6ISU9YUC7i4nKumTNnurXXXtv98MMPFg6TCEMoE4cffrjlY7cAZeKGG25wSy+9tN3uy8EwHxGH3QW+4xAyt92y+8DlYHxGTH0iFxG6tG7duu6qq66yXQR2I9ihIAQn0XFoV//+/R3hRKmTBYnyiafPQV2+4/4BEuE8ufQs2B4u1uI+BEKfQvq5FRfrHoKAi898SFTaz/0I3H1AHH9i+BN5Z9lll7VLyYjVH/Z81BQR2U/n5RHZTwfnmlKLZGXpyUqR/fhvl8h+fKxE9uNjpZzVAAFI8xNPPGEXSUGMIc0saNnIPjfVTpgwwQ0dOtSI+NFHH20XhUGKsexjBecOgFmzZhnJJZ48l3zddNNNFu+fOiDoEGni07OrQOhRSDaXX/lEfHwOinHnAN/RLvweCW8JKedz3I54Zvjw4Y643BD+Aw44wPrCbcMoA9za6yMUUec555xjygU7ETyDYkKYU5SVnXfe2d1xxx0V7Xn//fftwjMuBCM2P5dyEQ6QOw+4FIt+8kM4T3Y06Fvz5s0tzj794rZfLtOKel5k/78IiHSnIyyKhXM6vavaWiQrS1NWiuzHn/ci+/GxEtmPj5VyVgMEIPVY6LnwaZ999nGE7oQQZCP7WOexaENoSVjoIdcbbrihkXPIPm48XOCFIgEZZ+eAz/r27VtxYRUWdcj2Ntts4z744AO7RCyYUAywhnNzMBZ5rOQoDJTBLbw8h3W9V69e7vrrrzdijeWflxQiTlsg4exYQPapC0IPGb/11lstD9/RDi7t4od+kce3x1/OtcMOO9gtwuCDooNCQHvGjRvn2rRpY81mt4PdCC4i404BcGSngfzkyXyenQ6RfZH9NMWEyH7+aEtWlqasFNmPP6dF9uNjJbIfHyvlrAYIQIa54ImbbXFzady4sWvfvr1Z3KN89rHgr7POOu6QQw6xHj7yyCNG6iHkQbLvffZvvvlmN2/evIqbc7nd1yduzj3xxBONCOP+4xOuMtzMSx2QZ1xyINePPvqokWis9pB1dg0g+1jf+b9Zs2ZG9iHXK6ywwmJkH6WDG4DpF0oD/rccIn7vvffc22+/7bp3715B9n17gjfx0j92BtgpYAcD6/6gQYOsXhIuQSg8KAqQflx6OLTGjgBKRebzKCfsKKA8+bMSvv+1BtSKPXt0QDc2VEtkLBb5Lbd68x+h0nlSsrI0ZaWrFV9Wls5sKk5LRPbj4y6yHx8r5SxxBPBrHzlypFnEvZ88Liljx4519erViyT7EGncVbCCE7d+1KhRbv/993ennnqqq1+/vrnOBA/oerJ/7bXXmitOp06dTMGAaEPW8XHPJPtAx47BG2+8YS47KBRPPfWUuRth6ScG/bBhw4zQU24css9tu7SNcwIoK9TZrVs3165dO1NucNFht4LyMsk+txWDFe44EHR2FlAOsPCzk8GtxvSH/qEwZZJ96sl8njI/+uij0Fkin/10Xp5yI93F6m86o1l1tUhWlq6slGU//rwX2Y+Plch+fKyUs8QRgCzj/46Pu0+4xbRo0cI999xzdsiVG2fxnefgbjBhIV9//fVtN2DatGnuoIMOMvLLwVeIN7+Dln1cVtgGx/e+VatWdsj166+/dl27djXl4vnnnzeSHUz4uuPagxX9l19+MaKNyw0W9dq1a1vduPngWhOH7KMYLFq0yJ7t3bu3tRuFYuHChfY5bcLKDya+PUHLPnmbNGliuyC4JaHg0Bb6QFs4L0Cf2akIs+yHPR81RUT203l5ikV+y63edEaz6mqRrCxdWSmyH3/ei+zHx0pkPz5WylnDESDaDMTZx+anuxDqBQsWmOU+KjVq1Mii6URZtTOfg2CjGAQTPvFY0/2ORC6o8dm/8cYb3ZQpUxwWft8+CD5KC65CKBXLL7+8++677yKLo965c+cu1h52QdgpwH0Hq3+2FPZ8WH6R/VwjWpjvy410F6u/hRmt6luKZOV/x64qZKXIfvz3QmQ/PlYi+/GxUk4hUDIIeLKPm011SCL76YxSschvudWbzmiqlkIgUN1kpch+/FEX2Y+Plch+fKyUUwiUDAJExfnpp59i7wQUu+E9R/S0cwlx0gpzV9ANunGACslTbqS7WP3Nc3j0WBEQqG6y8s2ePd2gmLKyCHBWVEnY6MmT49+MXlVtHdewYWTR1UU+pNFOkf2qmoEqVwgIgQoEOBNRp06d1BGZOnWqndlIO6nedBAHZ6JGKQmBmoJAsWRlUvyKJeOStLM6tJH+pNVOAmsUKtVq3br1okIVpnKEgBAQAkJACAgBISAEhIAQKB0ERPZLZyzUEiEgBISAEBACQkAICAEhUFAERPYLCqcKEwJCQAgIASEgBISAEBACpYOAyH7pjIVaIgSEgBAQAkJACAgBISAECoqAyH5B4VRhQkAICAEhIASEgBAQAkKgdBAQ2S+dsVBLhIAQEAJCQAgIASEgBIRAQREQ2S8onCpMCAgBISAEhIAQEAJCQAiUDgIi+6UzFmqJEBACQkAICAEhIASEgBAoKAIi+wWFU4UJASEgBISAEBACQkAICIHSQUBkv3TGQi0RAkJACAgBISAEhIAQEAIFRUBkv6BwqjAhIASEgBAQAkJACAgBIVA6CIjsl85YqCVCQAgIASEgBISAEBACQqCgCIjsFxROFSYEhIAQEAJCQAgIASEgBEoHAZH90hkLtUQICAEhIASEgBAQAkJACBQUAZH9gsKpwoSAEBACQkAICAEhIASEQOkgILJfOmOhlggBISAEhIAQEAJCQAgIgYIiILJfUDhVmBAQAkJACAgBISAEhIAQKB0ERPZLZyzUEiHw/+2dCdyN1fbHF64kmTWgGyKVkDIlQ4qMGSOKJMKlkCGhqAzlUiEhsyRRaFCi4oqMkeuWiiTDRYMhEQq5n+/q/7z/43WO8zzvmd+z9ufzfrzOu5+99v7tfdbzW2uvvbYhYAgYAoaAIWAIGAKGQFgRMLIfVjitMUPAEDAEDAFDwBAwBAwBQyB+EDCyHz9zYT0xBAwBQ8AQMAQMAUPAEDAEwoqAkf2wwmmNGQKGgCFgCBgChoAhYAgYAvGDgJH9+JkL64khEDcINB7cWDJmzBj1/uzbt0/y589vciOMQKxwLn2ytDz11FMRHp01bwhED4H3GzeWTDHQlV5HuHffPikQA93qpZ87jxyR3B06eHkkJnW/+uorKVGiRMRl33333WGTYWQ/bFBaQ+kVgdtvv10uv/zys4a3a9cu+fTTTxNmyCim3Llzy8qVK131eUODDa7qWSVDwAsCwy4cJm+++aaXR6xuAiGQjLpy/QbTleFaou+0aiWDv/kmXM1FrJ3mzZtHXI+98cYbYmQ/YlNoDRsC5yLwwQcfSIYMGWT37t0pf9y0aZO89NJLIcHVt29fueCCC2TQoEEhteM8vHz5cuFle+rUqXPaGzBggFx99dXSpk0bV7KM7LuCySp5RMDIvkfAEqx6MupKI/vhW6RG9v8fSyP74VtX1pIh4AoBXmCLFi2S0aNHn1Ufsl6tWjW588475b777pNWrVpJvXr15IknnpBatWppXZ4dPHiwEu2XX35ZsmfPLv/9739lxIgR2h5GxIsvviivvvqq1i9ZsqSMGjVKTpw4Ifny5ZMVK1bIDTfcILly5ZKFCxdqCMRdd90l3bp1kwsvvFC2bdumcnn+uuuukwMHDkijRo1k+vTpUrRoUTl06JD06dNHGjRoILfddpv87W9/kyxZsgiK5Pnnnw84fiP7rpaGVfKIgJF9j4AlWPVk1JVG9sO3SI3sG9kP32qylgwBjwjwAoMg//TTTylPjh8/XjZs2CAff/yxEvq6devK1KlT5b333pN33nlHunbtKn/++aeMHTtWqlevLi+88IKS86efflpJ//z586VMmTLq2e/SpYuSe0r58uWFtmmDz1q0aKEkf8eOHdKpUyepWLGiGgBz586VOXPmaDvPPPOMhud89NFHSvRvvfVWrfvAAw/IY489JlmzZpVvv/1W/4axULp0aTVQbrnlFiP7HteCVQ8NASP7oeEX708no640sh++VWlk38h++FaTtWQIeESAF9hvv/0mX375ZcqTkG3+f++990rPnj3lxx9/lPr16+uh1nLlyqln/9prr9UfiDX1e/fuLWvWrJGrrrpKjYcOHTqoAQDZdwpkf9y4cVK5cmW54oor1AMPKafe0qVLte0rr7xSmjVrJsWKFVPjYOLEiTJp0iRZv3691uX/27dv1/CgnDlz6m4B4T1VqlSR2rVry6WXXqoGBG0FKubZ97hIrLorBIzsu4IpYSslo640sh++5Wpk38h++FaTtWQIeEQg0NY0zTRp0kQef/xxjefnd4g43nYMgY0bN2p4DyE07777robbbN26VUNy8OgTBuSP7BPeA2kvXLiwkv0KFSpIjhw5lOzzO7H5hw8fltWrV8sdd9whM2fOPIvsv/322/Lhhx9qOBByOFx8//33p8Tsc1CXXQCH7Hfs2FH48S0Zns7gESWrbggER8DIfnCMErlGMupKyWC6Mlxr1si+kf1wraU0tUMcNl5T3zCONDVkDyUkArzACNcZM2ZMSv8J0YFI8/myZcuUdI8cOVI96ZygJz6+c+fO0r59e2nYsKGG7nz22Wcav79gwQIl25D5bNmyaciNr2f/fGSftjEA8NQXLFhQY/Xx5PPDGiVkqFevXhrnT92BAwcqySdVmHNANzXZ9zcp5tlPyKUa9502sh/3UxRSB5NRV5pnP6Qlc9bDRvaN7Kd5NXGAkjCJli1bprkNCN3w4cPVW2ol+RAg5IXQF9+yf/9+jYO//vrrpWbNmvLoo4+qZ591Nnv2bDl58qScOXNGw3rw8r/11lt6cBcDgZCgOnXqaDw+B20h93jnKYTlnI/s443nhYpRQTly5IjuDhCnj8cfucTqY1zkzZtXzw0gA3lG9pNv7cbbiI3sx9uMhLc/yagrjeyHbw0Z2U9isv/aa69pxhLinP/44w8lLv/+9781Drp///6SOXNm+e677zSlINlLIDmELZCFhNAFQiogWKtWrVLPrG9GFIgZcdF8hreT0Ai8rLSHBxZv7dGjR+Wiiy7Sg42ff/65TJkyRS/9IesJoQ++6Rh9lzxZT2bMmKEZUTho+dxzz6lHF8OBDCqFChXSGG/6BIH7/fffZejQobJ48WK/2VbC93WyliKNAASfeHpCdlh7F198sRw8eFDFsjZ/+OGHlC4QnnPs2DG/6TLP10+I+/fff6/PFShQQPbu3auyWKu//PKLX1lexm2efS9oWV23CBjZd4tUctRLD7rSyH741qqR/SQm+443/R//+IfGRkPuIfZ4MfkhjpmDiMQn33TTTUqyHnnkESXR69atUxLEQUjipvHO+2ZE4dBk2bJl5ZJLLlFyT6YUyDtpEbn4hZSIl112mXprMSwIzeCwI3XpC15TvLn+Srt27TReGy8rzxN2waHNtWvXqlFBBhUU3ZAhQzR2m1CPqlWrSqVKleSTTz45J9sKWV6sGALRQsDIfrSQTi45RvaTa76TYbRG9sM3y0b2k5zsk4aQFIbkEZ82bZp6vgmL4LAixYlL/te//qVx0sRGQ+QxBNgBIFsKZJsbT1NnRCGLCSEYHHa8+eabNUUiYRhkLcHzTiGDCjIg5RgPeGzxppI6kWf8FbystAF5px4eXLKhQPbJew6h51Km4sWLp9zECuFnN4LQDH/ZVsL3lbKWDIHzI9BxQkc1SKNdML5JaRrtYnKjg3jnCp0jfvNkdEZiUgyBvxBY37GjDI2BrvSKf6x0nJd+lqxSxW7Q/T/Aku5SLTz7XBA0a9YsJc2Qcwj7s88+m5InnBAbSDNpEFu3bq0En/AZ4qAJyYHsE1cN0fbNiEL6QzzmXH1MHnMyoBBuAxEvUqRISpw/RgK7B/zQnhOGQfgFN5P6K4T4PPjgg7q7QCgQRN8h+/STdphMYq99UzpibJA/3V+2FS9fGqtrCISCQDCyn29nvlCaD/hsNK4h9yfc5EZkOs9pNFY4R2d0JiUZETCyH75ZTwSDhNEG6+eRTJnk49y5QwImKck+scnE0kPwIeGQZbztHLz95ptvNC4fTz6HG4mr51Ii6hLnT7YSPOkc1CX9YeqMKBgBtIUXE087Hnieh3RzCREhPq+88oqG8WBI7Nu3Ty8qYgeBi5TwxvsrGCcQduphdJB5JTXZxyBo3LixtkE+djKqYCAQ6+8v20pIKyeOHyaciVAnLotyzkAQk84ZjC+++EKxYO68FNYI68K5rMrLs/FUF2OQw7dexx/qGM4XxpN/S34psLVAqCL8Ph8rMmhyIzKdRvbDDKvpysCAxkpXWhhPmBd5Omiu71VXGdn3Oo949iHgZDbJlCmTxuNz2BbyDRnn8C6EDtKMZ/7JJ5+U48ePCwdkicEn5SCkD0KPFz91RhTi6GmLtmmLXQPOAmBVYVgg9/Tp09ouWVSGDRumbfNDvD0hRv5K06ZNNeyHg7e//vqrZnMhFAnyf8899+ilR+RbxyjIkyePtkc6xn5tTMkUAAAaV0lEQVT9+gXMtuIVu0SpT1gW48ZQc/K9Y1Cxk4PRNmHCBA2HClbAm/ApdmDYUWFNEHaVyAWDlTXpe/FWNMZjZD8aKIvuKnI+KNol2eRGG99IyTNdGRjZWOlKI/uRWu2J266R/TTMHWQfj/emTZtSso84zZB5BEL+9ddf62FZCtY9IT3E/TqfQQAh04TdUFJnROGz0qVL6y4BGX+cgvIgtz5hOL6FHOaE4fA5XudSpUqdMzLOFmA8kIllz549Suxp2+mD7wOEHGGg+BJTf9lW0gBfQjzCrgw30JKVCFLPvAUj+5yhcM5UkO6NH4wpMOdgddeuXWXnzp2a9YjsNHj6SZeJMdG2bVuth7ccY4JsTBz8Jo0mO0DOGmAO3GRvypo1qx4Qx+hkN4IdCtYIc06/CDMLlFWKv2FwUkipyeFv3/5wMB2jktA0SD9GJN49jFF2n/xlpeJAOoe9OVdCqNq8efP0cLq/5wMtECP70fnqJBvpjtV4ozObkZdiujL+dKWR/civ+0STYGQ/DTPmkH2UXDwWjAQMjtQFL70/Yh+PY4hln0h5SrpRwqkgxpBm5vp8ZJ+dkEWLFuklVtmzZ9fQJy4+gxRj2OEFZ3dm165dSnI5h8E5CM57cP4CGRB0iDQ7LewqcEEVJJvzIU5hR8FN9iaeIfSLXR4IP/nzGQvnQAg/q1ixYsodDb5ZpdiJ4BkMEwwcjBUuxSKMy+nP5s2bNe8+efKXLFmil3RxSy+7Uexk+ctKxUF2QsMYV+XKlfWgeKDnjez/hUCsSKjJjaX2SSzZpivjU1ca2U+s71E0emtkPw0o4yklDMbX456GZuyROEUAUo+HnnMRpDFlhwMCdD6yj3cej7ZzXgIPPeSaHR3IOWSfMB4OT2NIQMbZOeAzMi9Rl4JHHbJNRiV2hwjx8i1uszd1795dxo0bp8SaexvwukPE6QsZodixwGhNnVWKOvyNfnBmgx/GBel3+uNcssV5Dw6Ogw+GDgYBXnt/WanIFoURCo58f6hP5qrUz7PTYWTfyH40VUOsjJtojjFSskxXxqeuNLIfqRWfuO0a2U/cubOeRwgByDAXQ3G2gdAa7jWoUaOGetwDxezjwWc3hcxKFC4rg9QTduVL9p2YfS5C4xI0CD8pTTnz4ZT3339fHnroISXChP84hVAZt9mbIPvBbr31l1WK+FvOk5CNaePGjXovg0P2nf743qjL+NgZYKeAHQy8+1zEhtFAcbJSYSg4t+US1saOAEZF6ucxTthRwHhyzko448/wdIaAM24HdMP3ZYgV+U02ueGbsdi1ZLoyPnWlZAisK2O3WkxyLBEwsh9L9E123CFAXPvkyZPVI+7s3BCWwoFFzmMEIvt4qQnFgRjnzZtXsx1B4nv06CHZsmXT0BnfA7oO2eeiNUKEyKKEgQHRJsSHGPfUZB+w3GZvol03ZD91Vilk3n333XpZG8YNITrsVtBearJPJiiwIhwHgs69EBgHePhTZ6XCYEpN9pGT+nna3LJli991YTH70fm6JBvpjtV4ozObkZNiujJ+daV59iO37hO1ZSP7iTpz1u+IIECqU1KbEuPuFA6dlihRQrggjRcc2Y/8ZeMhixG3JbMbsG3bNs1wBPnt1q2bEm/+9fXsE7LCNjhx+TfeeKMecuW+BDI6YVwsW7ZMSbZvIdbdTfYmQmvckP3UWaXoNwbFyZMnNesTfcLLDyZOf3w9+9TlcDm7IM4uhb+sVOxU+PPs+3s+0MQa2Y/Ikj+n0ViR32STG53ZjJwU05XxqyuN7Edu3Sdqy0b2E3XmrN9xiQDZZkiZ6uTmp5MQag5G47kPVEiDymG3QF7t1M+5zd50PpACZZWC4GO0EGdPaA7Zmw4ePBiwKWLxyRDlXOxGRX9ZqQI14O95f3WN7EdnyScb6Y7VeKMzm/ErxXTlX3MTCV1pZD9+132semZkP1bIm1xDIMYIxHtWqdTwGNmPzoKJFflNNrnRmU2TEg4EEk1XGtkPx6ynrzaM7Kev+bTRGAKuEUi0rFIdJ3TUW6UDlXw787keu5eKyUZCbbxeVofVTQYEEk1Xru/YUYaeR1fGy5yVLVtW7x+K55IIfQS/YP08kimT3aAbzwvN+mYIGAJ/IcCZCC6ii3bhxmvObES7mNzoIA7OZI2yYgikFwRipSu94hcrHeeln4nQR8YTrX6SWCNcJUPZsmXPhKsxa8cQMATSBwIcisZ7YcUQCCcCeBbJlmXFEEgvCJiuTC8zGV/jmDhxovATrmJkP1xIWjuGQDpCYP369ZqbP9rF5EYH8WTDOTqompRkRCBW3yWvWCdCPxOhj+CeKP30XSNG9r1+Y6y+IZAECMRKmZnc6CyuZMM5OqialGREIFbfJa9YJ0I/E6GPRva9rjyrbwgYAnGLQKyUrsmNzpJINpyjg6pJSUYEYvVd8op1IvQzEfpoZN/ryrP6hoAhELcIxErpmtzoLIlkwzk6qJqUZEQgVt8lr1gnQj8ToY9G9r2uPKtvCBgCcYtAx44dw3o4yO1ATa5bpEKrl2w4h4aWPW0IBEYgVt8lr3OSCP1MhD6Ce6L003eNWMy+12+M1TcEDAFDwBAwBAwBQ8AQMAQSBAEj+wkyUdZNQ8AQMAQMAUPAEDAEDAFDwCsCRva9Imb1DYEkQiBjxoySM2dOOXToUMBRc+Pl/v37PaPipu0CBQrITz/9JKdOnfLcfqAH3Mjl2YIFC8ovv/wiv/32W1hku5GbO3duyZQpU5rwDGW8zOHBgwflzz//9DzWPHnyKE6BnmVMhw8fTlPbnjtjDxgCMULAzfc7rbrS65CyZcsmZ86ckWPHjnl9NKr1L7jgAsmaNavqB38lXsYRTMdFFbQ0CjOyn0bg7DFDIL0jMHDgQKlTp46+MHiRtWvXTnbs2JEybP5GnaNHj8rFF18sQ4YMkYULF7qCJVjbJUuWlHHjxsmJEyeUJG7fvl26dOniqu3zVQom13m2dOnSMmXKFHn22Wdl/vz5EZcLfq+//rpkyZJFZe3cuVM6dOgQcbkYU4wTnHmxLl++XOfRTbnqqqukSpUq0rVrV2nRooXOkW+58sorZfr06Tp/F110ka4Nt227kW91DIF4QSCYXglFV3oZI+R5xowZwveasm/fPrnnnnvOMrTRq+hX9Dblueeek6VLl3oRE3JdbmevXr26tG/fXo2Se++996w23Ywj5E64aCCYjkNnLlu2TH7++ecUvBlTPBYj+/E4K9YnQyDGCKBsV61aJfXq1VPPOkT0wIED8vDDD6f0jBcELxYIXbdu3aRx48Zy++23B+25m7Z5IWXOnFkJL0QYhcoLYevWrUHbD1TBjVyehZi+9dZbkj17dn0Rhkr23cgdMGCAlCpVSrgeHcPqoYcekvHjx4e0o+FG7hNPPCG33HKLznOTJk2kT58+UqlSJVcYM+cVK1aUa665RvudmuyPGTNGDQiMxKJFi8qcOXOkRo0aAb14roRaJUMgzhBw8z1Lq670OtSmTZtKjx49pGrVqvro6tWrVYfNmzcvpakFCxbI888/r/r9jz/+8CoiLPXxlKMfrrjiCtmzZ885ZN/NOMLSkSCNBNNx6E50KO++WGHpFgcj+26RsnqGQBIhUKZMGXn55Zfl5ptv1lEPHz5cihUrJihhp6xdu1aJ3ObNm6VWrVrqta1QoUJQlNy0nSNHDvVG4X3Co9+mTRslpWkJM3E65EYudV977TX55JNP1Fs9duzYkMm+G7mTJ0+WwoULC+PmpTFt2jT1uIdS3Mi9+uqrdbwYdJdccomsXLlSevbs6Uks6fL8kX2MJMjGiBEj1IBZt26dGm8bN2701L5VNgTiGQE337O06kqv4+7Xr5/cdNNN0rx5c3108eLFqsueeeaZlKYg+XjWM2TIoM6TTp06pXj5vcoLtT47p4UKFTqH7LsZR6iyvTwfSMeRlQedBpbHjx9XB82sWbO8NB21ukb2owa1CTIEEgcBPPRDhw5N8fL2799fypUrdxbZ/+yzz1RJf/vtt3LjjTeqcYCnN1hx0zZt4GEfOXKkvrz4N1Ql6kYunpyaNWtKw4YNZcmSJWEh+27kspOQP39+9cpVrlxZX9Z42EMxbtzIbdu2rRpTX3zxhRQpUkR3EjDcvJRAL0LCdiAbo0eP1uYcsr9p0yYvzVtdQyCuEXDzPUurrvQ6cEg9oSctW7bURzG4+d4NGzYspanZs2crKd27d69MnTpVvf6jRo3yKios9QORfTfjCEsHXDYSSMe1bt1a34uEcXXv3l13SNHfoehtl13yXM3IvmfI7AFDIP0jgIeZrWc89Sgutn0pvXr1Shk8HiOU9aJFi1TJPfLII66Iopu2Cd3hRXX69GkNEcKgCLW4kUvMOtvyJ0+e1INj/Dt48GDXZxH89dGNXLzrePQfeOCBFGJMKA8kIa3FjVyMjN27dyvGHB5kLv156c/Xh0AvQow/dmZ69+6tnkS8/BiD8fgiTCvG9pwh4OZ7llZd6RVd4vPRIY7B/uGHHyqRd85SscOGXnOSDqDXOVvj7AR4lRdq/UBkP9g4QpXr9flAOg6HFHobJwk6bs2aNboL/dVXX3kVEfH6RvYjDrEJMAQSEwG2e3kZrFixQubOnathJe+8846G0/DymDlzpmZhgeQTu882ptvDScHaHjRokIYQefUyB0M6mNzrr79e48wphJ+89957erlYoGwRweQ5fw8ml52T2rVrS926dTUGfsKECYpzqFmIgsllx4SsQ3gCOTMwadIkPXTrJf409YuwQYMGetCXXR9IBJ5P1gghYNWqVXMLmdUzBBIGgWDfs1B0pRcQ/v73v6uTpFGjRnL55ZerHqlfv75mVMubN6/s2rVLdTjhJ1u2bJG3335bd98cZ44XWeGom5rso/N+/PFH1T/+xkG4YSyKr47LlStXyjsQfQm2OEg48/TYY4+lhL7Gop/nk2lkP95mxPpjCMQJAp07d9aYfAoKmNAWMij885//lPLly2t4Dd5bCh74Zs2a6WErNyVY23iceXH5FsJN2JIOpQST69s2XjHGF+oBXdoMJhcD45VXXtEx4/lma52DdaGWYHLZ8kcuuxkUXvxsSXspvAiZeydTE7sRkHtCgyAWjI2YVgw4DgdaMQTSGwLBvmeh6EqvWJEwAacF37mPP/5Y+vbtqzH7GPMY4i+++GLK2Sr0NTH7aUmd7LVf/upD9tlZaNWqlf4ZJxL6BB3kbxzhkJmWNnx1HM4L5x3ImSf0JwW9jdML51A8FiP78Tgr1idDIE4QwIvBwc1AYTSQxOLFi+u2pdfwjGBtRwqCeJYbiXsF3IyXOcTrRwrOcBeIx3fffReRtsPdV2vPEEgrAsG+Z6HoSq99gkDzXQ7kCaevhJ3EiuS7HU+wcbhtJ9L1yDaGjovnYmQ/nmfH+mYIGAKGgCFgCBgChoAhYAiEgICR/RDAs0cNAUPAEDAEDAFDwBAwBAyBeEbAyH48z471zRAwBAwBQ8AQMAQMAUPAEAgBASP7IYBnjxoChoAhYAgYAoaAIWAIGALxjICR/XieHeubIWAIGAKGgCFgCBgChoAhEAICRvZDAM8eNQQMgdAQICMEF2gdO3bMU273tEolIwapIA8dOpTWJuw5Ec3kEc15M9ANgWRHINrfOdOV4Vlx0Z63QL02sh+e+bRWDIG4RYC85+RcdsrBgweFG1ud/MDR7ji5lbmMi1zFXO7CD30ZM2ZMRLsyZMgQqVOnjhw5ckRuu+22iMpy2zgX33Bx19atW/USqkQp0Zy3RMHE+pn4CJiu/GsOTVeGby3Hi640sh++ObWWDIG4RIAXGIVc+Llz5xZyuVPq1asXMA9zJAeydu1ava6dy0kqV64sDz74oJL9ZcuWRVKscGU915tzqcyrr74aUVluG3fIPvcYcEV8opRozluiYGL9THwETFf+NYemK8O3luNFVxrZD9+cWkuGQFwiwAvs5MmTesU3xbmdtn///tKjRw8l3mzZEkrTsmVLGTx4sNxxxx0aqsFn3L64aNEi4UbZ48ePS8aMGSVfvnwCaX/66ac1JOauu+7SW1OzZs2qsrg5ddiwYXLfffep537Tpk164+73338v11xzjZw5c0Y2btyoNzx27dpVxo8fL7NmzQooe8SIEXoNOR5wLmn69ddfVfbKlSvPwrxEiRIyatQoNWqQsXnzZr29ls/KlSundbn8pEWLFinPpe7jww8/LMWKFZP7779fLxT7+eeftQ0uneJadP5+4YUX6q3C9Gv58uUSSO6ll16quyj0m9tqwXHFihUydOhQDSeizcKFC+ulZanJvldZjAnDCYzp/6lTpxRT2qW/S5culX79+kn79u2lbdu22qfrrrtODh8+rPi/++67Z43v6NGjeoPwG2+8oQZSmTJl9Fbc0qVL6y6MM2+zZ8+WcePGyQ033KA7SNu3b5fWrVvrJWuB1gVj43nqcgul01fkYYSCMdgzzxMmTNA+WDEEIo2A6UrTlelVVxrZj7T2sPYNgRgjwAvs9OnT8tRTT6lXH/INkYdwQUQzZ86sxBgyCEmH2EHqIcply5ZVIlapUiUl95kyZVIDgIKXnPqE5axatUo/27Bhg5I+jAdIdO3atZX4UXhu/vz5ejU6bY4dO1bbcMJ4eDaQ7JkzZ8q1116rcf0QZMgspLZ58+ZnobtgwQLJnz+/fPnll/pv3rx5lfD+8MMPaowwzilTppx1pTmf+/axZ8+eSnLpL+1UqFBBCTEG0OrVqxUDiDIklc9r1qwpgeQSooN8CmNmfBgR9Itx4NkHq9RkH4PKqyyewVjD2MJAcXZwduzYoVfS8/eGDRtKmzZtpFmzZtqnPXv2SMGCBZWYYwx++umn+jlGGOMCLzxTjAHMKXv37tXQI2fefv/9d/2ddYYM1gwGAAZWoHVRvXp1NTpo/+uvv1ajgzVapUoVNYYoc+fOVWMBA+LWW2+NypmOGH9VTXyMETBdaboyvepKI/sxVi4m3hCINAKp41AhWBDJbt266b8Q//r162tIDx7e8uXLC4QXj/VLL72kHnV2AfD4Q8ggapA6iCGx95MnT5bevXvLBx98IAMGDFCChgcZ4wFiCJGG9CGP4hvG4xvPiHc8kGwIKmS/Xbt28p///EfboC/ObgXt5smTR3cf8Lgznhw5cmgfDhw4oEYHfeCZqlWrngW5Q/adPjr/p519+/ZJqVKllOAPHz5c+vTpox7wQYMGqQcarzwGDOTXn1y81xBlzknceeed6s3/6KOPZPfu3dKkSRM1iLp3734O2adtZHiRRbuQ/YkTJ+qcrFu3To2RGjVqyPTp06VkyZLSoUMHxQKyP2PGDDU+XnjhBalWrZrOL+OsVauW5MqVK4XcM9+0B9lnR4KdId95A0wMGAwG8KIf1G/cuHHAdYGhBtl/88039ewGxB4sGzVqpL9jgLLj9M0336hRyJxbMQQijYDpStOV6VVXGtmPtPaw9g2BGCPgeKuefPJJJVBr1qxRLzMFss/vDgEmlh0vK0SM0BuIGGQRoturV68Ugg3Zh3Bz2JUQC8JHIGmE7kAWCf2BbOIBh+wT5jF16tTzkn086IFk45GG7DvnDOg3Rosv2Ycs0gc85k2bNlWPOWE+hKNwIDcY2Xf66Bg4EFcMIKew08EhWjAaPXq0hhNddtllguccDPzJ7dSpk5J9yCqGCoSbl4lzIJfQJsh5as9+ly5dtL4XWe+//76SfUJsOAOxfv162b9/vx5KBnvCb5gn/g/Z5zPGzFwxZ4wbuZTPP/9cQ5PYefEl+zxLm75knzAbjD3qYXBR8M5v2bIl4LogHIg15mDOTgDGBGSfECyMTdaCs+vErgqGpRVDIJIImK40XZledaWR/UhqDmvbEIgDBFLHofp2CdJM2AeeXUqDBg0EowCiu3jxYg25wdvrhFfwO95zQmTweOPFJYxnyZIlQjgHxBbCRpgKHnLi5CH7DgF1PPt42CGWEHwnHATSHEg2CjgY2adtwk9y5swp8+bNk+LFiyvBXbhwoQwcODAo2Xf6iJcajzwYQLYhyHitGQcx94QSMU688s7uQiC5hB9RF8MJcusQ4SxZsqiHHFwxGFKTfc5EeJVFfLsXsu+EVRHrz+4Oxhxefs40QOAx9Aih4SA1BglkHNLNGQ1fsg8pZx6nTZumOzmPP/64YsfaCbQuWDv+yD5YT5o0SQ0K1tWjjz6qa43dj9TnM+Lgq2VdSGcImK40XemP7KcHXWlkP50pKxuOIZAaAS8vMDz2c+bMkSJFiqQ0A2El/hpPvpPCk3qQXggwhywJ34HkO4XMP4TeOCExvmSf8BG8xhA6QmAgjni78fIGku3E7J/Ps49s/s7ZBPpHcQgwHno8++xiOIaN01d/fSS8hfAl2mEHYeTIkUra6SPElsLn7GRgWASSSzhMarKPx97xoNM/wo38pd70Kgvjg8O4DtbMOyFMeOM5p8BZCgh23bp11bN/4sQJPbhLwYAjVIt/8c4zNs5t4Nnnc/rsj+wzbxh8tO+0RbsQfjJ6BFoXHHj2Jfuvv/66noFgDTEfxOg72LOjwoFiK4ZApBEwXdlSdzNNV6Y/XWlkP9Law9o3BBIQAbLIEBbDAVXnQC5kn5AYYr6LFi2q3mg83k7hkiXCVLZt26ZE/nyFuniT/YVm+JPtBULaxXNMvDqGSFoL7TAeDBcMG6fQv0KFCqm33vdzL3LZfeBgLG2fr4RDVur2+/btq2SfGH1CfZhf30vG2BFhl8V3bG4wZBeF+SScybd4WRfOcxB9B3sn5MxNH6yOIRBtBExX/nXJnulK9ysvFrrSyL77+bGahkBSI+CQfWL4rSQuAr5kn2xKVgwBQyC8CJiuDC+esWotPelKI/uxWkUm1xBIMARI1UnYCbHkVhIXAe45IESJMxmkJLViCBgC4UXAdGV48YxVa+lJVxrZj9UqMrmGgCFgCBgChoAhYAgYAoZAhBEwsh9hgK15Q8AQMAQMAUPAEDAEDAFDIFYIGNmPFfIm1xAwBAwBQ8AQMAQMAUPAEIgwAv8DdzKwcdeXjCoAAAAASUVORK5CYII=", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/troubleshooting.html\n" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.m_u_parameters_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "87cf8e90-8109-4f9a-9a2e-bab8c4ff2118", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v4+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", - "config": { - "view": { - "continuousHeight": 300, - "continuousWidth": 400 - } - }, - "data": { - "values": [ - { - "cum_prop": 1.809438003874675e-7, - "match_probability": 0, - "match_weight": -22.34, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.000005609257470950979, - "match_probability": 0.02506, - "match_weight": -5.28, - "prop": 0.0000054283136705635116 - }, - { - "cum_prop": 0.000009409077222244377, - "match_probability": 0.03073, - "match_weight": -4.98, - "prop": 0.0000037998197512933984 - }, - { - "cum_prop": 0.000009590021022631845, - "match_probability": 0.03285, - "match_weight": -4.88, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.000016827773038130545, - "match_probability": 0.03665, - "match_weight": -4.72, - "prop": 0.0000072377520154987 - }, - { - "cum_prop": 0.000024065525053629244, - "match_probability": 0.03812, - "match_weight": -4.66, - "prop": 0.0000072377520154987 - }, - { - "cum_prop": 0.000025513075456728984, - "match_probability": 0.04333, - "match_weight": -4.46, - "prop": 0.00000144755040309974 - }, - { - "cum_prop": 0.000029855726666028204, - "match_probability": 0.0454, - "match_weight": -4.39, - "prop": 0.00000434265120929922 - }, - { - "cum_prop": 0.00003293177121577173, - "match_probability": 0.04767, - "match_weight": -4.32, - "prop": 0.0000030760445497435285 - }, - { - "cum_prop": 0.00003636970325260336, - "match_probability": 0.05019, - "match_weight": -4.24, - "prop": 0.0000034379320368316257 - }, - { - "cum_prop": 0.000043426511297184334, - "match_probability": 0.05299, - "match_weight": -4.16, - "prop": 0.0000070568080445809755 - }, - { - "cum_prop": 0.00004595972438892204, - "match_probability": 0.05611, - "match_weight": -4.07, - "prop": 0.000002533213091737707 - }, - { - "cum_prop": 0.000050121431627303537, - "match_probability": 0.05963, - "match_weight": -3.98, - "prop": 0.000004161707238381496 - }, - { - "cum_prop": 0.0000589876780168197, - "match_probability": 0.06362, - "match_weight": -3.88, - "prop": 0.000008866246389516164 - }, - { - "cum_prop": 0.00006785392440633586, - "match_probability": 0.06818, - "match_weight": -3.77, - "prop": 0.000008866246389516164 - }, - { - "cum_prop": 0.00007889149571838061, - "match_probability": 0.07344, - "match_weight": -3.66, - "prop": 0.000011037571312044747 - }, - { - "cum_prop": 0.00009590021261374204, - "match_probability": 0.07959, - "match_weight": -3.53, - "prop": 0.00001700871689536143 - }, - { - "cum_prop": 0.00011815629920874926, - "match_probability": 0.08685, - "match_weight": -3.39, - "prop": 0.00002225608659500722 - }, - { - "cum_prop": 0.00015796393279288168, - "match_probability": 0.09558, - "match_weight": -3.24, - "prop": 0.00003980763358413242 - }, - { - "cum_prop": 0.00018999098472249898, - "match_probability": 0.10626, - "match_weight": -3.07, - "prop": 0.0000320270519296173 - }, - { - "cum_prop": 0.00025006432485952246, - "match_probability": 0.11962, - "match_weight": -2.88, - "prop": 0.00006007334013702348 - }, - { - "cum_prop": 0.00034071716879680025, - "match_probability": 0.13683, - "match_weight": -2.66, - "prop": 0.0000906528439372778 - }, - { - "cum_prop": 0.0003408981125971877, - "match_probability": 0.14604, - "match_weight": -2.55, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.000486919757065607, - "match_probability": 0.15983, - "match_weight": -2.39, - "prop": 0.00014602164446841925 - }, - { - "cum_prop": 0.0007744394376345554, - "match_probability": 0.19211, - "match_weight": -2.07, - "prop": 0.0002875196805689484 - }, - { - "cum_prop": 0.0015637162857160547, - "match_probability": 0.24073, - "match_weight": -1.66, - "prop": 0.0007892768480814993 - }, - { - "cum_prop": 0.003648731696273444, - "match_probability": 0.3223, - "match_weight": -1.07, - "prop": 0.0020850154105573893 - }, - { - "cum_prop": 0.003649998302819313, - "match_probability": 0.33909, - "match_weight": -0.96, - "prop": 0.0000012666065458688536 - }, - { - "cum_prop": 0.015437400746407093, - "match_probability": 0.48748, - "match_weight": -0.07, - "prop": 0.01178740244358778 - }, - { - "cum_prop": 0.015449885868576985, - "match_probability": 0.50644, - "match_weight": 0.04, - "prop": 0.000012485122169891838 - }, - { - "cum_prop": 0.015450066812377372, - "match_probability": 0.99966, - "match_weight": 11.53, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.01545024775617776, - "match_probability": 0.99972, - "match_weight": 11.79, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.015450428699978147, - "match_probability": 0.99977, - "match_weight": 12.11, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.015450790587578922, - "match_probability": 0.99983, - "match_weight": 12.53, - "prop": 3.61887600774935e-7 - }, - { - "cum_prop": 0.01545097153137931, - "match_probability": 0.99986, - "match_weight": 12.79, - "prop": 1.809438003874675e-7 - }, - { - "cum_prop": 0.01545169530658086, - "match_probability": 0.99989, - "match_weight": 13.11, - "prop": 7.2377520154987e-7 - }, - { - "cum_prop": 0.015452961913126728, - "match_probability": 0.99992, - "match_weight": 13.53, - "prop": 0.0000012666065458688536 - }, - { - "cum_prop": 0.01545712362036511, - "match_probability": 0.99994, - "match_weight": 14.11, - "prop": 0.000004161707238381496 - }, - { - "cum_prop": 0.015457666451766272, - "match_probability": 0.99995, - "match_weight": 14.23, - "prop": 5.428314011624025e-7 - }, - { - "cum_prop": 0.015478474988412927, - "match_probability": 0.99997, - "match_weight": 15.26, - "prop": 0.00002080853664665483 - }, - { - "cum_prop": 0.015486255571886431, - "match_probability": 0.99998, - "match_weight": 16.02, - "prop": 0.000007780583473504521 - }, - { - "cum_prop": 0.015716235131549183, - "match_probability": 0.99999, - "match_weight": 17.6, - "prop": 0.00022997955966275185 - } - ] - }, - "height": 400, - "layer": [ - { - "encoding": { - "x": { - "axis": { - "format": "+", - "title": "Threshold match weight" - }, - "field": "match_weight", - "type": "quantitative" - }, - "y": { - "axis": { - "format": "%", - "title": "Percentage of unlinkable records" - }, - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": "line" - }, - { - "encoding": { - "opacity": { - "value": 0 - }, - "tooltip": [ - { - "field": "match_weight", - "format": "+.5", - "title": "Match weight", - "type": "quantitative" - }, - { - "field": "match_probability", - "format": ".5", - "title": "Match probability", - "type": "quantitative" - }, - { - "field": "cum_prop", - "format": ".3%", - "title": "Proportion of unlinkable records", - "type": "quantitative" - } - ], - "x": { - "field": "match_weight", - "type": "quantitative" - }, - "y": { - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": "point", - "selection": { - "selector112": { - "empty": "none", - "fields": [ - "match_weight", - "cum_prop" - ], - "nearest": true, - "on": "mouseover", - "type": "single" - } - } - }, - { - "encoding": { - "opacity": { - "condition": { - "selection": "selector112", - "value": 1 - }, - "value": 0 - }, - "x": { - "axis": { - "title": "Threshold match weight" - }, - "field": "match_weight", - "type": "quantitative" - }, - "y": { - "axis": { - "format": "%", - "title": "Percentage of unlinkable records" - }, - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": "point" - }, - { - "encoding": { - "x": { - "field": "match_weight", - "type": "quantitative" - } - }, - "mark": { - "color": "gray", - "type": "rule" - }, - "transform": [ - { - "filter": { - "selection": "selector112" - } - } - ] - }, - { - "encoding": { - "y": { - "field": "cum_prop", - "type": "quantitative" - } - }, - "mark": { - "color": "gray", - "type": "rule" - }, - "transform": [ - { - "filter": { - "selection": "selector112" - } - } - ] - } - ], - "title": { - "subtitle": "Records with insufficient information to exceed a given match threshold", - "text": "Unlinkable records" - }, - "width": 400 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcgAAAHeCAYAAAAfLptsAAAAAXNSR0IArs4c6QAAIABJREFUeF7sfQeUVEX2/p2cIzPDkJMgyJAREMlJdAXFZTGjgLKgyKIoq+6uP3dVXBYVFxFQxIBgWAQliiBRJAoKCBKUpKQBJsDk+D9f+X/YNB1e6Op+Nd46h4PSFe776t766la4FdSuXbtK4sQIMAKMACPACDAClyEQxATJGsEIMAKMACPACFyJABMkawUjwAgwAowAI+ACASZIVgtGgBFgBBgBRoAJknWAEWAEGAFGgBHQhwB7kPpw4lxVGIHg4GCqqKi47Avxb0jO/+4OBsc6EhISKCIignJycqikpCRgyBn9hoAJyg0zAjZFgAnSph3DYvkHgffff5+aNWtG3333HT3wwAOi0cTERPryyy/Ff99yyy104sQJj8I899xzdOONN9LWrVvp4Ycfps8//5xSU1Pp5Zdfpg8//NBj2QEDBtD//d//0c8//0yDBg0y9NEbNmyg6OhoGjlyJO3cufOKssuWLaPq1avTv//9b/rkk08M1W23zK+//jp17NhR9MuTTz5pN/FYniqKABNkFe1Y/ix9CGgEuWvXLhoxYsQVBAnSAnl5Ss8//zz179+ftm3bRg899BBNnDiRrr76akGQmzZt8lgW9f/tb3+jU6dOEcjSSNIIctSoUfTNN9+4Jcj//Oc/9L///c9I1bbLO336dOrQoQOtWbOGJkyYYDv5WKCqiQATZNXsV/4qnQjoIcjbbruNbr31VgKJNmnShJKTk4VXCc8RnqczQc6ePZuuuuoqeuGFF8Qy66RJk+jYsWMUGRlJ9evXF//23nvvCe/SmSDhTXbv3p3y8vLozjvvpK5du9Ljjz9O8fHxVFxcTMePH6d//OMfdPjwYdIIcvv27ZSRkUGhoaH0ww8/0JgxY6igoIA0D1IjyLvuuosefPBBiomJoaKiIlq6dCnhN+f017/+lW666SZav349tW/fni5cuEB33HEHeSoPmUH08L4h++7du+nvf/+7+O+aNWvSf//7X6pbty5h2ReywSPUSBvtXLx4kfbs2SO+fezYsSLPSy+9RNWqVRN15Ofni3o0guzRowf9+c9/poYNG4plbOAB7x15OTECvkKACdJXSHI9SiKghyAfeeQR6tWrl/i+srIyCgkJoaCgIDp06JAgMWeCdFxiPX/+vPAokSorK6m8vFwQWWlpKV133XWXEeTq1avpnnvuEfnGjRsnlmzhgYJUzp07J4gtKirq0nKsRpCou7CwUPyGtHLlSnr66acvI8isrCyx1IqE/05KShLfAJKGp+uYpk6dSp07d770TyBleHDuyi9YsIDmz58v6gORYdkX//3999/T8OHDae3atUJ27OeCmPE7EjxBEJ6z9wu8IVN4eLjAAuWAORLyg3hBqmFhYWJCUK9ePVEn2nniiSeU1EMW2p4IMEHas19YKj8hYIQgQSzwJOGtgTAwcGPZTw9BIu/gwYPp9OnTtHHjRkF6WI6tVauW8LxAnBoJaMRRp04dMeCDHFetWiX2SFu1aiW8q27dul3yIBctWiS82aFDhwrvCx4fCN3Rg4QXDK8WpPXqq68KWbAsrNXliiDhmU2ePJk+/fRT+uijj9yW//rrr6lv376XlolBrs8++yxhcgB8//Wvfwmi69mzp/Dw5syZQ9dcc43wqv/4xz9eIkgQ5TPPPCO85qeeekrgO3DgQEHoGmYgyHfeeUfUi/TVV18Jzx57rcAWnjknRsBXCDBB+gpJrkdJBN5++21q2bKl8ETuvfde8Q0gLZAOEpYascQJwoE3BwJKS0uj5cuXi0H/2muv1UWQubm51Lt370uDOry9xx57TCwhgiAdE7w/eIEgUZBJ06ZNL/vdmSCxpLply5bLDheBpEBsIA4so2I5EqdrnZNG8q4IEoSF/U0keLfuyh88eFDI6MqDA3ZYngVZ3nDDDaIuHCrCH3ibWFLVPEgsm4JAcQgHBO64Lztv3jyxr6stsWr/r8mNCQaWtt98800l9ZCFticCTJD27BeWyk8IwEO7/fbbxR4YPBwkDOgY2DHo4uQkCAYEiSVNkJoZgoQX1K9fP7cECaLCYSAsF2K5FLLAY8QfeHLwKrHfpu3POXqQ8Jpee+014RHCm9UI1NGDxFIwPFLsmWIJFddQsCcIklqxYsVlaGtLrI6EB7J1V75t27YEDxXe6f333y+8Q+xjnjlzRpAfZNewRENa/RoBagSJ/U4k7JOC0PHd2lLvF198ISYTIMg33nhDLE9jAoE9XeRJSUkRHif+G8vgnBgBXyDABOkLFLkOZRHA4K55HSAxLGdiKRKDL/beMPD7gyBBFth/hOeIpVZ4qFiCHD16tCA8LHGCuLHX5uxBwpPFfmijRo1EWSw7Pvroo5ctsdaoUUN4yCAR7N/hu+ER7t+/X7TryoN0JMi//OUvbstjORQEioR9UxBkXFyc+G+QI7xP7LvicBJIE54g0owZM4TX50yQOIwDDx77mOgT7NfCE0YCQb777rvCs8a34NoHCBGePv7u0qULE6Sy1mg/wZkg7dcnLJGfEcBBEiwlahfr0XxmZqY4tYlB3Zkg4a3A69KWWN3dg8QpTAzwOKTjzoPEYRmcSj158qTYb4OHiL1E1A1CfOuttwTZIMHDhBeH37AXigM2IEws32rLn/CE7777blGf8z1I7S6hBi8mA/D4sHfnjSDxu6fyOFSD5VItnT17VuCXnZ1NWDp98cUXxaEaLWneOP7fmSDxb8AAh3VAkkjAD6eHQbbwTp1lASY4KTt37lw/aw83V5URYIKsyr3L32YIAXg+uOC/Y8cOW10XgFwgM5C2u4RlX5AkPElPCYSMPVdci/B2v9NVPZ7K4zccIkLd8L4dEyYfCMiAZVIENdBzHQMnXyErroxgKdg54ZuBDU7G4oqIqzyGFIAzMwJOCDBBskowAowAI8AIMAIuEGCCZLVgBBgBRoARYASYIFkHGAFGgBFgBBgBfQiwB6kPJ87FCDACjAAj8DtDgAnSJh2OsFr+eBoJ7eA4vN5nnGTAgwMbOGChnZ7EqVBEf/HH9/vie1ST1xffbPc6oFO4SqKKDgUKT2CEZOSuqGxsfT0m+bK+3yVBQkkQecQx4Zg4jqTjCPm3337rV/3F8Xgck9cuSvuicRyjR3gyXEFApBVEgcHpQfw7YoXi2L2n5FjeF/JodeDFCkSOwanMm2++WTzDhMve48ePF2HNcDXC3csTuMeHo/8I72Ym4VQk2sFFdVzfcEx6v9dRXtwn9FfCaU4EK0CYOKs4OMvsqB/++h5ft4MACbAfBEvwZcLJYFwtwX1UBIW3c8I1IVylQWxcd2nWrFniJDHu93pKzvoGHUHQDBlJ75jk2Dau+eDKE77ZOeF6E041++JZtN81QSKe45IlSwS+uEgNksIlbC0wtQxlcFWnDIJE1JYjR46IY/8gRwzsr7zyim6CdCzvSxwQwxQvYqB+zPQQYxOvXuCiOd5exOVy53t5Wvu4QI7QbloYOKNyaSHkcE/wwIEDlxXX8714jcNRXqPtW8mPe5q4s9ipUyfxqoUVHJzlcNQPKzIGsqwsgoT3hLB3iOSD+6Z2ThjPYFOIDGWVIB31DRMyuxEkxhEQpKsn4pggLWqp5kFiNu54sXjmzJnUpk0bMVPCBWx0AmJMYjkCgzI8HCRcfEYAZsTTRGSQf/7zn2J2ef3114tQX7GxseJuFurHbA6DLyKbYFCG94NwWIhM8qc//Ulc+v7pp59EAGzMgPv06SPyYgkSd8UQl3Pz5s2XfTGMFfJAPkQqgSeGSCLIDyJct26dCPiMKCMYUHE3DaG+MOtC6DJEbUH98Kh+/PFHcSnbecnl448/FuVxGR0eJ/4bsTQxYOCSPIJRI8Ebw2CNf8f9QRgSLrHDwLTQaghIjXbx4gP+HZe/EfIMCo7lSmCF1ytAlMAYbQEfXJRHZBjgg/BjCK+Gi/TwEtz1D7B3Jy8iw+DSPZbhcIn96NGjl3DV8734bkd5Ea5NT38jUADkxoSldevWYqUCMWCBFUgXF+URuBwJf2OAg25hooAVDbQD3YTe4gI+wsppOODf0Aft2rW79MIIvB0soXvqN+3DEcnGUT8++OADl/VhCdw54VUN6B36HjN2PDcFuTCrB5njPuS0adOoQYMGYiBD/7myG0QAwjfgDip0GK+GIKIQEnQb4QA1PNAG7m+iTUTigaeD0HwgL+Rx5UFCBkQOgt5Bpvvuu+8KfQeOwBX9g2+FzsFm4IUgqhEwhT0jko/2qgl+g83j29zJiRUcTL5hb+np6aJu2IKj7uE7kQ844VkzRA3CxBZjCmwACQHatYhPrr4HdoXvQsJ4gVCJyIfvgW0juhJ0CR4kwhkiOhHwhjzDhg277E4syjjqG3QCKzcIJdi8eXNRFjoNXBBcHiELEcRBC8jvSi9gc+7GNniQ7sYk4A6ChmeMYBEYG/ft2yfGPo0ggSuwAca4K6xFcWIP0iRRagSJcFWfffaZMDYQF8CH8Q0ZMkSEswI5giBAJFiSgIcJjxNG8ssvv4jwX1B2GDU6Ef+OSCZQLrwwD+PFYAfiA5FBsUBuCxcuFAMkgjxD6UAokAH5oCwwYhAr4lFiRqgRjfa5KA/jBcFCHhggyAWDCpb9sIwIgwPRYJl1ypQpgpCg1JgQgFyg3MABdbt6UBdyoDyik2CJCbLju+DlwSAwSUC8UESBQdvwCoEbJgswcASj1pZkQG54jBikBAyhwDAsKDYmAMAYAzXkxxIrJh3IBzwRCg3lMSHAG4zaEqu7/sF3uZMXRI3yKAv8sVqgJT3fC6w0eSEzvF49/Y3BFoQPckN/YNBDf4OwkYAjJikYNDG5wXdDHgxyIEkMEsAYOgq8MRBoOIBwoGv4Ziyho++hU4gZ6w4Hx0v6iGnqqB/QZ1f1aY9Ja3jh/yEXZEV7IC5sTUBmfCMGKugOdAsDNSZuruwG3hl0FvoFEsDqTYsWLQThQE+x3A6yANaQDZMFrLiANKFvwAmDJyadWLZ3JkjoKWRAn+N3yAe7h0yOCQSDb0K/Nm7cWEz6tPB/wBETF0zioLPaChP6D5NezX5cyQlCwqQbkyAExMfqBfpJCwKvyaDlA4ZoFxMPEA4mlegjrBpgsuvuezCugMBh09jCgJ0CCxAHllSBM/BDP0EeBF/AKyiwSUxstQka5MGysrO+QX+BH8Ye6C8mG9Bb7SFrTFBgw5ikuNILfK+7sU17ts15TEI0KHwTxkn0C3QeE0pggP7TCBIY1a5dW4wRsBGMTxi7mCAtEqSr4hjYQSZ4HR5Grc0WFy9eLCJ1wPihTHiZAUqB2RaUBzM0/DuUEoqEhBkgOg8DHwhSe/kd5IB8WiBmPKiL+kCQ2BuFgSA/6keH42FbxwQjx2APw4GcmJ3hbww0MA4YEgZaKAkGUOclVhg1SAcJS4bIp3mE7ggDy7OYSWph1uApoB0YGH4DVoiDiQkGPAVXBIn8IFkM8JhwYOIBmSELBgZ8AwgSAzQCc2uhyzD5wOwaHhiMBPW46x9gjAHNlbyIbQojcrXE6kyQrspjQNHkxSCjt78hP3QEM2jsfWKQAY7ACUSJb8Egi2ejgAs8GezLQvcwMCKf85KXRpDQOwwgGoGhH4EfyNwdDtAHx+SoH+7qc95/wmQGRKvtZUGn0S50ErqJ+KyYxGFSgzwI5+fKbuC5wwuH/MAXeMAGMJHDQIeVENQJskC/Y2DEpBOkDu9HiyOrBVx3Jkh4eJiwwX4RdQerKPBAMPl0TLBT6C4mFkj4PjxQjfo1gtSeJoOHjIEa5I1+wPe6kxODNCaPmi5jYgbPzXlpEASJGLXoayToBGwCcmpB6GFfWClw9z2OS6zox717914iYmAMUsSzZI7tYOIAAnWWx1nfoL8oi9UP9BV+xyQCBIlxC32DPvKkF+7GNtieqzEJ/Y+4yBomGC+x0oDVCegSCBITAdSLyRVWFOBQwI7QZ0yQFgkSCoUBE0YJbxHeD8gMJAMycE7wbDD7w0AHo3VM8NhAco4PzaLjoej4gxkvFAoJxgrvRVs+gXLiJXkoGgwS5AOZsHSE2S6MxzFhSQoDCwYlDIggUAwEMHwsn2BA0gZ8VwSpHd5BnZjVQz5nZXImDI3cUQazYeQHCcPTxcChBczG7BGzVUeChDLDQ9JLkFgqgmem4aN9u3Y4BRi56x8QNozDlbyYweslSFflsXSlESQw1tvfzodqsPKAwQSyahMpDPiQDQMAVh4wQwfBYJD2RJCYhGFwwECFBGzg6UA2dzhgQuSOIN3V53yADFg4xlZFfVpsWk2vsFyoER/sw5XdQFbnYOkoD8LD4Ig/zgleMiZSIFF4qUjAD6HsnAkSS4I4kIaJBuRDgg07EyRsAHFvtWVMbQnPkSCh9/hukCm8W0xCQFraW5mu5ER5TIYGDRokfkb/gjCd5YSNY+KIAR8J7eBJL0wI4DHhe/E37Nvd9zgSJPoR5eGBOSbndjBhwHjiLI+7CRnqgn3D/qETznuBnvTC3djmfEBOG5OwkoAJJSa0SNAn7cFsTFJAkFgVw4QSkyas1CBBv7Ux6grlMfgPv+tDOs57kJhNwXOBkoJEYDBQfiQYH9bysfyCWSaIEJ4blB1LF+gQkCCIE/lAcDhwgg7EQOJIkHiuBx6C9j4e9gawXIpZJogOHQxjALFgFoUBEvt0zoMaZnMYaDH4QNHhcWLfCcuTngjS8RSrXoLEN8KzRtKUD8to8KpB5FjuwoCPgQ2eGgYg7bQpvCN8g16CxBIWPEYNHyw3oV/wfZix49/d9Q+8KRCDK3mNEKSr8o4ECT3Q29/OhxygUyA/zfAxmGGAxz4OZsfYw9G8fwzAnggSy3yYQcNjQ4K+wdsDgbvDwRNBuqtPe8tS00FtuQwEh4Rlfgze6H/0l/ZIMva48dQWdMOV3cDLQZ9Cf7DHhATbwmQPy8mwL827gXcATw36i3ZgX+gDJEzUsCLhPNBjIoXlN+z3woPCMh48RWeCBIawBSyhI6GPUJ8jQYJENQ8QS73w8GC7WFp0JycI3PHUqCeCdMwHXcMEAHbtSJAgcHff40iQmMhpE3N8DzAGgYCoHdvRS5COh3Q8EaQ7vYBH625sg9fpakwCIWIcwMoEkoYDxkmcVQBBYv8UugCnBsvMSNAL6DF7kAZnA1p2d4d0ADo6EQaKjoYXgxkwZrEgHXQAlr2gfBigMfBjRqadHIMSoLNAWDAseFOoD3uMjgSJAQTeKgZFLHGCmKEM6Hh4ZTAC1IuBFd4kZt7OJ+i0h37RHgZUGANIGQMN9jAdCRJtQGEw4DgfqbZCkCAqHH6AnNgrQxtQTsgEbPAHAw8GG8imlyCxdAd8gDW+C/sPwB7evba06K5/0E/uiAF7ZfC2MVCjvGNy9pi9ESTIVm9/6yVIbakKAwEGQe1JJxAklqHxB4SJ2bKGA3QWuEJPsEyFgQaECx3US5CO+uGuPizrOyaQApYssaeFWT6Wt7CPCP0DAWP/CB4xlubg8UAuV3aD78CSM7DEIIdJA8gLfYTBEb9DjzSdgr4BA9SJiQDODWDfS3s5xZkgMfiCeLEaAX3FUiOWctG/jgkyYOkT7WGfEfhBp50JEl4jJiFIIH1MyLDi4U5OEJAvCRJyuvsejCP4hj/84Q9ivAI+6B/sKWLLABNu6JAegnTWN70E6U4voLPuxjaco3BFkBhP0F/YgsIWDlZKsFqFPnD0XNHHmKRjvMUEAKTJe5AmyRHFNILU9pm0qjALx34CSBFKBCPEKUok7CuC6ODJYXYMkkNCx6AcBhnkxzo9lnKwRIalVOTFjAszTW2JFWSBo8hQZiS8to72MEBqSynwDFEHiAEE6Jy0pTTsVYLk0BY8EAweSI7LFhqZYkYKz8JZGUFqmgeitaOVx4zX1UCLdkFYIC9ghG8GFvAWcBgFAxpIH/8Og4aROhMklqnQjkZG2h4kjAEeAWaISCgPuaH8GjHAy3TVP5jZu5NXO/yCdp33IfV8L8jeUV69/a2XIOGZgxQ1nQOhY5kJXg+Wy+AhwPPE4KDhgOUxzXPQ9BEDNnTKEw6O+uSoHxisXNWHSZdjAonAO0Mfa+1igMLSnObBwlMDSWLpEB4oZvSu7AZLptAbLQFnDMhI0FlsfyDBJrDqg3bhweOhaExONR0BNpq3odUFLLS68Dt0FhNeyOLoSaM+EB9IGe3AzoEhPHrne5Dw7rBF4uhVu5PT+d4h7AltOcvpnA9toB/Q146eE8YSd9+DVRZsbcBrhycMXcJeKWwQ3jPGNed2QOAo5ywP9ikd9c0dQTrfR3SnF9Bld2Obq0k7xiRMmDCuoU58A/5gPMUk17FdbXzFeKn1HfYhncc0M5Txu1xi1QsUjA+khRkylnwcEwZ8GL7z80LoTCgXZqnOl9Gd28XxenhL2tq59jtmSagDpOuryCCoE3uoRiJo6MEJy1A4qITZtvN3YPDGgSPH06J66tTygCDhlbi7F+mpfzy1g/0oeDi+SEb6W097mDzBS8KAhr6HjmGwxoEUfC8ISTsEptWHCR8ODWGCAj01EyXJUT/01gdZQeDw4DC50dOuO7vRntGCPTn3DbYjsHzr/AwZ8MChFZRxxsQRa+goSAmYQkbUh1PojraAiTEmdmgD2wYgdkwK4KHqTe7k1Ftebz5P3wN9gfza01/w8jEpMGOD7vTNm5ye9MLM2IblazgQ6Bt342FiYqJY0cD+svN2lDd5Pf3OBGkFPS7LCDACVQIBLCHjHAA8D5A4DkrBs3Veiq8SH8sfoRsBJkjdUHFGRoARqKoIwOvBUj8OAeHsAa5wYM+M0+8bASbI33f/89czAowAI8AIuEGACZJVgxFgBBgBRoARcIEAEySrBSPACDACjAAjwATJOsAIMAKMACPACOhDQKoHiZcVcJTa3bFbHO3GnS9PR7S1lzF8fT1BHzycixFgBBgBRuD3ioAUggTx4TQYjkkjbqFz5HyQHi6Y41I3EkKTIXIDLsVqhIgLx7izhIuiiOPp7U7h77UD+bsZAUaAEWAE5CAghSARGg3hfnBRHBEmnAkSIc9wwRhhqXC8GuGQEDkCYYUQKgqXdBGGDREecKkb/86JEWAEGAFGgBHwJwJSCFL7AIQ0Q0g1Z4JEEHBEnUBUEERGQIw9xPBD7D28XoBIEQjZhnBjCC/mq2gy/gSW22IEGAFGgBFQG4GAECQeP0X0f4RxQjBvRLDAKxjwJuFBIvgsYn0iTBle0cjIyBDBkOFZIiHeIP44JoQjQ2xGTowAI8AIMAKMgBEEEBdbezLOsVxACBKBl+EVYhkWCXEcscyKVwiQEFdPewcRAZDhgSLwLF67cBfvEYGend+sMwKQv/NiUoD9WZWSajKrJi90QTWZVZOXMfbPiKOaXriT168EiXfdENsQZIinZxBBHkG58T4inpjSTqoi4j2WZvHkDw77IJI8XofACw7ulluZIOUrflVRevlImW+BMTaPnd6SjLFepMznqyoYSydI7WkSQA0PEQ+s4uFdLIfitQZ4hNhrxFMoSNibxJt0ABhPtcDLgvuL06zaO2yuuo0J0rwy6y1ZVZRe7/cGIh9jLB91xpgxdkYgIB6kt27ACVW8Su94xxHPtSDh340kJkgjaJnLywOLOdyMlGKMjaBlLi9jbA43I6WqCsZSPUgjgFrNywRpFUHv5auK0nv/0sDlYIzlY88YM8ZKeJC+7CYmSF+i6bouHlgYY70Di3ykzLfAemweO70lfY1xUGg4RddsQeVFF6go85BeMXTns+USq27pdWRkgtQBksUsvlZ6i+J4La6avPgg1WRWTV7G2KvZ+CSDr/QiLC6Nouu0paj0phQUEkZl+efp7Ka3fSKjYyVMkD6H1FqFvlIga1IYK62azKrJy4O3MX00m1s1vVBNXqt6HBQcSlHpzSi6TmsKi08X3VxZUUZFp/dT/i+7qDT3pNmud1uOCdLnkFqr8Pem9NbQMleaMTaHm5FSjLERtMzl/b1gHBqTTNG1W1NUzQwKDv01Tjc8xoJfvqOCk3upsqzYHIA6SjFB6gDJn1l+L0rvT0yd22KM5aPPGDPGrhDQrRdBwRSZ1phiarem8OS6/99bLKeizINU8PN3VJLzi3yAPWxt8ClWv8B/ZSO6FShA8llSepvIzBjL7wjGmDH2NlbAG8T+IQWHir+DQn79OyK5HkXXakHB4TG/eosF2VRwYjcVnthDFaWF8oF1aIE9SL/C7b0xHli8Y2Q1B2NsFUHv5W8YdA+tXbfee0Yb5ejRowetW7fORhJ5FiWg8gYFUbAgtXDCSVLxN/4/KOjXvwX5hYslUfwejDyhERQRFUtllcG6MC46c4AKftlFxVnHdOWXkYkJUgaqFurkwdsCeDqLMsY6gTKRLTL1KkpqPchESS7ye0OgsqyEKitKqbK8TBy2EX+Xl1Jx1lEq+GU3VZTkBxwSJsiAd8HlAvDgLb9DGGN5GOMwRUKzvhQVXEq550/La0hCzQhdefbsWQk1y6ky0PKCzCpAcuX4U3rpsMylfy8rpgr8Vlos8iDvgD/0p88++UgOIBJqZYKUAKqVKnnwtoKevrKMsT6czOSKqdee4pv0pFoRF+ibpW+YqSJgZVTTC9XkRceqJjMTZMDM0XXDqilQVVJ6m6nCZeKoohex9TtSXONuVDsyl7YvedPOkF4hmyoYa4KrJm9VGiv4FGuATJuVXj7wjLE8jGMbdqa4RtdT3cgc2rpklryGJNSsml6oJi8TpASltVolh5qziqD38qoZqmryqjSwxF3VlWIbdKJ6kdm0Zclb3pXHRjlU0wvV5FVJj7156exBBshwWenlA88Yy8M4vkkPiql3LTWIyqJNi2fLa0hCzarphWryMkFKUFqrVbIHaRVB7+VVM1TV5FVpYIlv2pti6rSlRlFZtJEJ0rvxWMjBemwBPJ1F+ZCOTqD8lY2VXj7SjLE8jBOuuYGia7Wkq6LP01eLfP+6gjzJq84JS5kYWa1bNdtjgrTa4z4tytvsAAAgAElEQVQur5oCqeTdeNtX8HFX+rQ6VfQiMeMmiqrRnJpEn6P1i97xKQayK1MFY9Zj2ZrwW/1MkP7DWldLqhkpE6SubrWcSRW9SGwxQLzR1zTmLK397F3L3+3PClTBmAnSf1rBBOk/rHW1pJqRMkHq6lbLmVTRi6RWt4pXGJrFZNKaz96z/N3+rEAVjJkg/acVTJD+w1pXS6oZKROkrm61nEkVvUhuM5giUhpQ85gz9OVncyx/tz8rUAVjJkj/aQUTpP+w1tWSakbKBKmrWy1nUkUvqrW7XbzflxF7mlZ9+r7l7/ZnBapgzATpP61ggvQf1rpaUs1ImSB1davlTKroRbVr76TwxNrUMvY0fcEEabnfPVWgik44foNqMjNBSlVh45WrpkBMkMb72EwJVfQipeO9FBafTq3jTtHnC+ea+dSAlVEFY/Yg/aciTJD+w1pXS6oZKROkrm61nEkVvUi57n4Ki02lNnEnafnCeZa/258VqIIxE6T/tIIJ0n9Y62pJNSNlgtTVrZYzqaIXqZ1HUGhMMrWLP0FLF3xg+bv9WYEqGDNB+k8rmCD9h7WullQzUiZIXd1qOZMqepHWZSSFRCVQ+/hfaMmCDy1/tz8rUAVjJkj/aQUTpP+w1tWSakbKBKmrWy1nUkUvqnd7iIIjYqhD/M+0aIE6L8ezHltWUV0VqKLH3iYh/JqHru72fSbVFIgHFt/rgKsaVdGL6j0eoeCwSOqUcJw+/eRj/4Djo1ZUwdjb4O0jOKRUU1UwZoKUoh7eK1VNgZggvfepL3KoohfpvcZRUEgYXZdwnBYyQfqi693WoYpOOH6AajLzEqtUFTZeuWoKxARpvI/NlFBFL2r0GU8UFEzXJx6jT+b/z8ynBqyMKhizB+k/FWGC9B/WulpSzUiZIHV1q+VMquhFjb5PiG/tkniU5s+fb/m7/VmBKhgzQfpPK5gg/Ye1rpZUM1ImSF3dajmTCnoRFBxK6b0fpcqKMuqa/AsTpOVe91yBCjrh/AWqycwEKVmJjVavmgIxQRrtYXP5VdCLoNAISu85lirLiqlryikmSHNdrbuUCjrBBKm7OwOT8ZtvvqH27dsHpnETrbLSmwDNYBHG2CBgOrMHh0dT9e4PU0VJAXVLy2SC1Imb2Wysx2aR01+OPUj9WPklJyu9fJgZYzkYh0TGUVrXUVRedJG6p59ngpQD86VaWY8lA0xETJDyMTbUAiu9IbhMZWaMTcHmtVBodBKlXv8AlRfmUPcaOUyQXhGzloH12Bp+ekozQepByY95WOnlg80Yy8E4NDaFUq8bRmX556lHrYtMkHJgZg9SMq6O1TNB+hFsPU3x4K0HJWt5GGNr+LkrHRaXRimd7qPSi5nUs04BE6QcmJkgJePKBOlHgI02xYO3UcSM52eMjWOmp0RYQk1K6XA3leaepJ71Spgg9YBmIQ/rsQXwdBZlD1InUP7KxkovH2nGWA7G4Ul1qFr7O6gk+2fq1aCcCVIOzOxBSsY14B5kbGwslZWVUVFRkdtPrVWrFuXk5FB+fr7LPKgD5VGPp8TXPORrk2qEo5q86EEVZI6oVp+S2/6Jis8fpd6NiAlSsumpoBPOEKgms189yKSkJOratSuNHz9eGM+0adNcqlDLli1p9uzZ9OKLL9LChQtp+vTppBHiqFGjqKKighYvXkxDhw4VJMoEKdkSvVRfVZQ+sCh6bl0FjCNTG1FS69uo6OxP1KdxCBOkZIVSQSeYIA0oQZ8+fWjYsGHUqFEjmjt3rkuCjI6Opk8//ZTi4uLopZdeoiNHjgii7N+/Py1atIimTJlCDRs2pJo1a9Lzzz/vtXX2IL1CZDmDaoaqmryqeJCR1a+mpJYDqSjzIPVpEs4Eadmy1J80MUGaUILPP/+cli1b5pIg582bR+vXr6fbb7+dXn/9deFBrl27loqLiykmJob69u1LCxYsoEGDBlFJSYnX1pkgvUJkOYNqhKOavKoQZFSNaygx4w9UePoH6ts0ignSsmUxQUqG0Gv1fl1i1aRxR5Bjx44leJkDBw6k1atXXyLI0NBQ4UGuXLmSJkyYQKdOnaJmzZpRRkYGzZgxQ3iWSCNHjhR/nNOkSZO8AsEZGAFGwBoCZ0pi6VBBClUPz6PG0eesVcalGQGbIODqVRqpDya7I8gNGzZQeHg4lZaWUlRUlPj7ueeeo+XLlwuoEhMTac6cOTRr1iwaPXq08ECfeuop6t69u9iXdJXYg5SvZap5ZKrJq4oHGV27NSU060sFv3xH/TIS2YOUbHqsx5IBDlSoOWeCHDBgAIEca9euLZZRkSZPnkxLly6lN998k3Jzc8W/TZ06VSzNIh8O+4wZM4ZWrFhBvXr1crvcygQZOCWS37K5FnhgMYebt1IxddtR/NW9KP/4DrqhZTUmSG+AWfyd9dgigDqKB2yJFeSHPUak7du307hx4+jrr7++JDKWU2fOnCn2IJHq168vSBMCR0ZGCuNLTU0Vp1knTpzo9lOZIHVogcUsqhmqavKq4kHG1u9AcY27U97RbdS/dRoTpEW78lac9dgbQtZ/DwhBmhE7LS1NFMvMzDRUnAnSEFymMqtmqKrJqwxBNryO4hp1obzDm6l/2xpMkKasSX8h1mP9WJnNqQxBmv1AJkizyOkvp5qhqiavKgQZd1VXim3QiS7+tJFubFebCVK/CZnKyXpsCjZDhZggDcElPzMrPWPsCgEV9CK+SQ+KqXctXTi0nm66th4TpGRVVkEnnCFQTWYmSMlKbLR61RRIFe/GsR8YY6NaqS9/fNPeFFOnLV04sJpu6tiICVIfbKZzsR6bhk53QSZI3VD5JyMrvXycGWM5GCc060fRtVtR7g8r6Q/XNWGClAPzpVpZjyUDHKhrHvI/67cWeA9SPtqqGapq8qripSc2v5GiamZQzt7P6ebrmzFBSjY91mPJADNBygfYaAus9EYRM56fMTaOmZ4SiS1upqj0ZpSzZynd3DWDCVIPaBbysB5bAE9nUV5i1QmUv7Kx0stHmjGWg3FSq1soMq0JZe9aRAO6t2KClAMzL7FKxtWxeiZIP4KtpykevPWgZC0PY2wNP3elk9v8kSJSGlLWtwtoYM92TJByYGaClIwrE6QfATbaFA/eRhEznp8xNo6ZnhLJ7YZQRHI9yto5nwb2upYJUg9oFvKwHlsAT2dR9iB1AuWvbKz08pFmjOVgXK39nRSeVJvOf/Mh3dLnOiZIOTCzBykZV/Yg/Qiw0aZ48DaKmPH8jLFxzPSUSOlwD4Ul1KBz2+bSrf26MEHqAc1CHtZjC+DpLMoepE6g/JWNlV4+0oyxHIxTOt1HYXFpdG7Le3Rr/+5MkHJgZg9SMq7sQfoRYKNN8eBtFDHj+Rlj45jpKZHaeTiFxlSjs5vepkE39WKC1AOahTysxxbA01mUPUidQPkrGyu9fKQZYzkYp3V5kEKiEilz4yy67ea+TJByYGYPUjKu7EH6EWCjTfHgbRQx4/kZY+OY6SmR1m00hUTEUuaGGXTbwBuZIPWAZiEP67EF8HQWZQ9SJ1D+ysZKLx9pxlgOxtV7jKHgsCg6s24a/fHWm5kg5cDMHqRkXNmD9CPARpviwdsoYsbzM8bGMdNTIr3nXygoNJxOr/0vDR50CxOkHtAs5GE9tgCezqLsQeoEyl/ZWOnlI80Yy8E4vfdjFBQcQqdXv0KD/3gbE6QcmNmDlIwre5B+BNhoUzx4G0XMeH7G2DhmekrU6PuEyHZq1WRijPUgZi0PY2wNPz2l2YPUg5If87DSywebMfY9xvAc4UFWVpQLD5Ix9j3GzjUyxoHDOKhdu3aV8puX3wK/BykfY9UMVTV50YN2lxl7j9iDrCwrEXuQdpfXlVWoJrNq8qqgx3onIUyQ8nnFZQus9PKBZ4x9jzFOr+IUa0VpoTjFyhj7HmO9g7f8ls23oJpe8BKr+b6WUlI1BapKs0IpHeqjSu2uF7j/iHuQ5cV54h6k3eVlD9JHimmwGtX0ggnSYAfLzq6aAjFBytaIX+u3u14ggg4i6ZQX5lLmxjdtLy8TpH/0VnWvlwkyMHritlW7D4Q8sARGYeyuF4jBilisZflZdHbTbCZIP6iJ3XWiKo8VvAfpBwWvCgqkgnej+ixWBYzxigde8yi9mCle8+DBW/4AwhgHDmMmSPnYu2yBlV4+8Iyx7zHGO5B4D7I095R4D5Ix9j3GPNGTj6lejJkg/d8XSuw1VQWvlwdv3yt3eFJtqtb+TirJ/oXOf/MhE6TvIb6iRtZj+SDzHqR8jA21wEpvCC5TmRljU7B5LBSRXI+S2w2h4qxjlLXjf0yQvoeYCdIPmLIHGQCQjTTJg7cRtMzlZYzN4eapVERKQ0pu80cqPneYsr5dwATpe4iZIP2AKRNkAEA20iQP3kbQMpeXMTaHm6dSkWlNKKnVLVSUeZCydy1igvQ9xEyQfsCUCTIAIBtpkgdvI2iZy8sYm8PNU6mo9GaU2OJmKjy9n3L2LGGC9D3ETJB+wJQJMgAgG2mSB28jaJnLyxibw80jQdbMoMTmN1Lhye8pZ+/nTJC+h5gJ0g+YMkEGAGQjTfLgbQQtc3kZY3O4eSoVXbsVJTTrRwW/7KLcH1YyQfoeYiZIP2DKBBkAkI00yYO3EbTM5WWMzeHmqVRM3bYUf3Vvyv95J13Yv5oJ0vcQM0H6AVMmyACAbKRJHryNoGUuL2NsDjePBFm/A8U37k75x7bThYPrmCB9DzETpB8wZYIMAMhGmuTB2wha5vIyxuZw81QqtkEniruqK+Ud2UIXf/yKCdL3EDNB+gFTJsgAgGykSR68jaBlLi9jbA43T6XiGnWh2IbX0cWfNlLe4c1MkL6HmAnSD5gyQQYAZCNN8uBtBC1zeRljc7h5JMjG3Sm2fge6eGgD5R3dygTpe4iZIP2AKRNkAEA20iQP3kbQMpeXMTaHm6dS8Vf3opi67ejCgTWUf3wHE6TvIWaC9AOmTJABANlIkzx4G0HLXF7G2BxunkolNOtL0bVbU+4Pq6jgl++YIH0PMROkHzC1BUHGxsZSWVkZFRUVufzkmjVrUmZmpsjjLqEOlPeUB2W/+eYbat++fQCgNdckD97mcDNSijE2gpa+vAnN+1N0zRaUu28FFZzYwwSpDzZLuViPLcGnq7BfX/NISkqirl270vjx42n+/Pk0bdq0y4TMyMig6dOnC+KrqKigw4cP00MPPST+TSPEUaNGid8WL15MQ4cOpZycHI8fygSpSw8sZVLNUFWTF51jd5kRZg7h5nK+X0aFp/bZXl5XCm93jPV6N5aMWXLhqoKxlPcg+/TpQ8OGDaNGjRrR3LlzryBIEGFYWBg9+OCDghDXrVtH//rXvwRJ9u/fnxYtWkRTpkyhhg0bErzM559/3mt3MkF6hchyhqqi9JaBkFiB3TFGoHIELM/evZiKzhxggpSoC1rVdteJqjwJkUKQGmCff/45LVu27AqCjI+PF95hXl6eIEV4iJ07d6bVq1dTcXExxcTEUN++fWnBggU0aNAgKikp8aqGTJBeIbKcQTVDVU1eFTzIpNa3UWRqI8r+biEVnf2JCdKyVXmvgPXYO0ZWc/h1idUbQeL36Oho4SW2bdtW/P3BBx9QaGio8CBXrlxJEyZMoFOnTlGzZs0IS7IzZswQniXSyJEjxR/nNGnSJKs4cXlGgBHwgMD3edUppyyKMmLPUGJoIWPFCFQZBLAd6JwC4kFiWXXhwoVUXl5OY8eOpUOHDl0mV2JiIs2ZM4dmzZpFo0ePFh7oU089Rd27dxeep6vEHqR8PVVtJquavCp4kNXa30HhSXXo/DcfUUn2z+xByjc7xjiAGPuVIAcMGEAbNmwQh3c6depE/fr1c/npU6dOFUuztWvXFod9xowZQytWrKBevXq5XW5lgpSvRaoRjmryqkCQKR3uprCEmnR+2zwqyT3Jg7d8s2OMA4ixdIJcunQpvf766+ITt2/fTuPGjaPHH3+c6tSpc9lnYy9y27ZtVL9+fZo8ebJQisjISHEKNjU1VZxmnThxoluomCDla5FqhKOavEoQZKf7KCwujc5tnUOlF87w4C3f7BjjAGIslSDNfFdaWpoohvuRRhITpBG0zOVVjXBUk1cFgkztPJxCY6rR2c3vUFneOR68zZmSoVKsx4bgMpU5IId0TElqshATpEngDBRTzVBVk1cFgkzr8iCFRCXS2a/forKCbCZIA/ZjNivrsVnk9JdjgtSPlV9ystLLh5kx9j3GaV1HUUhkHGV+NZPKiy4yQfoe4itqZD2WDzITpHyMDbXASm8ILlOZGWNTsHksVL37wxQcHk1n1r9OFSUFTJC+h5gJ0g+YOjfBBBkA0D01yYO3/A5hjH2PcXrPsRQUGkFn1k6lirJiJkjfQ8wE6QdMmSADALKRJnnwNoKWubyMsTncPJVK7/0oBQWH0unVU6iyoowJ0vcQM0H6AVMmyACAbKRJHryNoGUuL2NsDjdPpWr0fUL8fGrVZPE3Y+x7jPUO3vJbNt+CanrBS6zm+1pKSdUUSMXBkDH2seoGBVONPuOJKivo1JcvM0H6GF531bEeyweaCVI+xoZaYKU3BJepzIyxKdjcFgoKCaP0XuOosryUTq95lQnSt/C6rY31WD7QPiXI8PBwXS9syP+s31rge5Dy0VbNUFWT1+5eenBYFFXvMYYqSovozLrXmCDlmxxjHGCMdUXSQczUv/3tb+Lpqc8++4yioqJo1apV9PTTT/tJfO/NMEF6x8hqDtUIRzV57U6QIRGxlNZtNFUU59OZDdN58LZqUDrLsx7rBMpCNkseJAKMIy7q2rVrqXfv3lRUVEQRERF0/fXX28aTZIK0oB06i6pmqKrJa3uCjEqgtC4jqbwwlzI3vskEqdNurGZjPbaKoPfypgkST1OtW7eO3nrrLRoyZAjh/4cNG0bvvfeeeLNxzZo13lv3Qw4mSPkgq2aoqslrd4IMjUmm1M4jRIg5hJqzu7zuLEI1vVBNXhX1wjRB4mM3b95MFy5coGrVqtHRo0cpKytLPHTct29fys7Olj8y62iBCVIHSBazqGaoqslr94ElLDaVUq67n8ryztLZze8yQVq0J73FWY/1ImU+nyWCxF7jbbfdJlqH1zhp0iQ6ffo04X1HuyQmSPk9oZqhqiav7QkyPp1SOt5LpRdO07mt7zNByjc5xjjAGOs6pAMZa9asSWVlZeIZqhtuuIG++OILP4murxkmSH04WcmlGuGoJq/dCTI8sTZVu/ZOKsk5Qee3f8CDtxVjMlCW9dgAWCazmvIgFy5cSKGhoW6bvOeee8TSqx0SE6T8XlDNUFWT1+4EGZFcj5LbDaGSrON0fsfHTJDyTY4xDjDGHj1IkI6n1L9/fzp37pyfPsFzM0yQ8rtBNcJRTV7bE2RKQ0pu80cqPneEsr79hAdv+SbHGAcYY48EGRwcLMRbvnw5RUdH0wsvvED79u2jl19+mWrXrk29evUSVz7skJgg5feCaoSjmrx2J8jItMaU1OpWKso8RNm7PuPBW77JMcYBxtjrHiSi5mzatIkOHTpEd955pxD3scceo7vuuouveVjoPB68LYCnsyhjrBMondmi0ptSYosBVHTmAGXvXsyDt07crGZjPbaKoPfypvYgtWpBkCBKLKcWFxeLAzuVlZWEJVZc+bBDYg9Sfi+oZqiqyWt3DzKqRnNKzLiJCk/tpZzvlzNByjc5xjjAGHv1ICFft27d6IknnqD09HQKCgoSB3MQOOCDD349yWaHxAQpvxdUIxzV5LU7QUbXakkJ19xABSd2U+6+X0+xM8Zsd64QUE0vTHuQ8BwnTpxIu3fvpo8++ohSU1PpxIkT8rXCYAtMkAYBM5G9qii9iU/3WxE7YxxTpy3FN+1NBT9/S7n7v2SC9JNW2Fkn3EGgmsymCRIAYIkVnmPXrl3FXUg7JiZI+b1SVZRePlLmW7AzxjH1rqX4Jj0o/9g3dOHgWiZI891sqKSddYIJkoiWLFlCNWrUoPz8fDpz5ozYf0QaMWIE5eXlGepsWZmZIGUh+1u9qhmqavLafckytkEniruqK+Ud2UIXf/yKCVK+yTHGAcZY1x7ktm3bSLvy4SjvTTfdJCLr2CExQcrvBdUIRzV57U6QcY2up9iGneniT19T3uFNPHjLNznGOMAY6yLIhIQElwRpl0DlwJAJUr4mqUY4qslre4Js3I1i63eki4c2UN7RrTx4yzc5xjjAGOsiyJSUFHH3sUuXLiL03M6dO+m1116jAwcO+El8780wQXrHyGoO1QhHNXntTpDxTXpSTL32Yv8R+5B2l9edvqumF6rJq6JeWDqk884771CLFi3E3mNFRQWFhISIh5J79OjBDyabZB1WepPAGSjGGBsAS0fWhKZ9KLpOG3GCFSdZVRwIVZSZ9ViHclrMYpogk5KSaNWqVXTs2DFx5wkE+dJLLwlyHDt2rDjhaofEHqT8XlDNUFWT1+6Dd8I1/Sm6VgtxBxJ3Ie0uL3uQ8seEqo6x1yXWxMRE+vLLL2nHjh305z//WeChhZr7+9//TitWrAhcLzi0zAQpvxtUIxzV5LU74SRm/IGialwjouggmo7d5a3qg7d8izffgmq2Z9qDBEQbNmwQwcpzcnKotLRUBAsoLCyk7t27C4/SDokJUn4vVBWll4+U+RbsjHFSy4EUWf1qEYcV8ViZIM33s5GSdtaJqj4J8epBAoCWLVvSM888Q/Xq1RMBA3AXEi96rFmzxkg/S83LBCkVXiUHQx5YfKsTSa0HUWTqVZT93adUdPZHJXVCRVJnPfatHruqzZIHiQpjYmIoNjZWHNSBJ4lDOnZKTJDye0M1Q1VNXrsP3sltB1NEtQaUtfMTKj5/hAlSvskxxgHGWJcHec8999AjjzxCb7zxBh08eJBeeeUVev/998VVD7skJkj5PaEa4agmr90Jslr7Oyg8qQ6d3/ExlWQd58FbvskxxgHGWBdBYg8yMjJSHM45deqUIEcEMedIOuZ7jwdv89jpLckY60VKX75q195F4Ym16Pz2D6gk59cHCxhjfdhZycUYW0FPX1nTS6zag8m7du0SsVeRhg8fTg899BD985//FHFa7ZDYg5TfC6oZqmry2p1wUjoOpbD46nRu6/tUeuE0E6R8k2OMA4yxLg9yy5YtIoLO2rVrRXByPJQcFhZGt912Gx0//utSS6ATE6T8HlCNcFST1+4EmXrdMAqNTaGzm9+lsryzPHjLNznGOMAY6yJI7EE+/PDDghS1hLuRTz75pJ/E994ME6R3jKzmUI1wVJPX9gR5/QMUGp1EZ79+i8oKsnnwtmpQOsuzHusEykI200usWpt4zeP2228nxGWdO3cu2SlQOWRkgrSgHTqLqmaoqslrd4JM6/pnComMp8yv3qDyogtMkDrtxmo21mOrCHovb4kgO3ToQFOnThXLrLt376batWuL8HOTJ0/23rKfcjBBygdaNUNVTV67E2T17g9RcHgMnVk/nSpK8pkg5ZscYxxgjHUtsW7cuJEiIiKovLyc9u3bJ7zImjVr8ilWC53Hg7cF8HQWZYx1AqUzW/WeYyk4NILOrHuNKkqLePDWiZvVbKzHVhH0Xt60B4kAAevXrxceI0gRJPnRRx/RxIkT6emnn6aVK1e6bR2BBcrKyqio6Fdjck4IhJ6bm+sxXB3qQHnU4ymxB+ldCazmUM1QVZPX7h5keu9HKSg4lE6veZUqy0uZIK0alM7yrMc6gbKQzTRBos3t27eL2KsgKsReRTQdeJFdu3Z1SX4gPvw2fvx4mj9/Pk2bNu0y0evWrUvvvvuuqAsxXpcvX07PP/88TZ8+XUTrQTujRo0Svy9evJiGDh0qovcwQVrQAB8UVc1QVZPX7gRZo+8TQotOffkyUeWvMZgZYx8YlpcqGOPAYaxriXXChAnCEBCHVUvbtm0TdyFdpT59+tCwYcOoUaNG4kCPM0EiAg88U9ynRJ6PP/6Yxo0bR3gdBFdIFi1aRFOmTKGGDRsKrxXk6S2xB+kNIeu/q2aoqslra8IJCqIafR7/lSBX/Xb2gDG2blfeamCMvSFk/XfTHiQO5oDw4MFdc801dNVVV4kTowsXLvQq1eeff07Lli27giBRdvPmzeKQD07HgmwffPBBEcKuuLhYkGffvn1pwYIFNGjQIF1xX5kgvXaH5QyqGapq8tqZIINCwii91ziqrCij06unXNIlxtiyWXmtgDH2CpHlDKYJEgSGQAH5+fnUs2dPQ4K4I0gsqX7xxRf03//+V9SnEeTevXuFB4l9TXitCGvXrFkzysjIoBkzZgjPEmnkyJHij3OaNGmSIfk4MyPACOhDoKwymLbk1qXQoArqlGCP4CD6JOdcjIA+BLAd6Jx0LbGiYIMGDejbb7+l/fv3iz1IJCydenrVwx1Bzpw5U0Tkefzxx8XVEXiTHTt2vHRYB480z5kzh2bNmkWjR48W7Tz11FMe359kD1KfEljJpdpMVjV57exB4noHrnngegeueWiJMbZiUfrKMsb6cLKSy7QHqXl48CSdk7dg5c4EOWDAAPH48l133SX2NHv16iX2HhGyrlu3bpeqx51LLM3iviUO+4wZM4ZWrFgh8rsjZCZIK+qhr6xqhqqavHYmSAQIQKAABAhAoAAmSH0244tcrMe+QNFzHZYI8o477hD3IJ3Thx9+6NWDXLp0Kb3++uuiKE7DghD37Nkjlkux14iDP//6178uBT2vX7++2JuEwHhBBN5ramqqOM2KqyXuEhNk4JRIfsvmWuCBxRxurkohxFzq9Q+IEHMINccE6TtsvdXEeuwNIeu/WyJI6827rqF58+b0008/XXZVJC0tTWTOzMw01CwTpCG4TGVWzVBVk9fOHmRobCqlXnc/leWdo7Ob32GCNGVB5gqxHpvDzUgpWxKkkQ/wlpcJ0htC1n9XzVBVk9fOBIlnrvDcVemFM3Ru6xwmSOvmpLsG1ryWZrAAACAASURBVGPdUJnOyARpGjo5BVnp5eDqWCtj7DuM8VAyHkwuyT1J57fNY4L0HbRea2I99gqR5QyWCBIHdHDiFEHLt27dKu4qItyc0WVQy1/hoQL2IGWi+2vdqhmqavLaGePw5LpUrd3tVJL9M53/5iMmSPnmxhjbAGNd1zxwLaN9+/ZCXLzmgeg3SLgXiXBwdkhMkPJ7QTXCUU1eOxNkRLUGlNx2MBWfP0JZOz/hwVu+uTHGNsDYK0HCe4TXeObMGSotLRXvQP7444/iagbuKOJkqh0SE6T8XlCNcFST184EGZl6FSW1HkRFZ3+k7O8+5cFbvrkxxjbA2CtB4qoFnrvatGkTJSQkiNc8cJcRdxPvv/9++v777/34Ge6bYoKU3w2qEY5q8tqaIKtfTUktB1LRmQOUvXsxD97yzY0xtgHGXgkSMiL0W3Jy8qUIOri7ePHiRcOh52R+LxOkTHR/rVs1wlFNXjtjHFWjOSVm3ESFp/ZRzvfLePCWb26MsQ0w1kWQderUERf8cUgHHiXIES9vwKu0S2KClN8TqhGOavLamSCja7WkhGtuoIITeyh33woevOWbG2NsA4w9EmSbNm0ue+LKWd7vvvuOD+mY7EQevE0CZ6AYY2wALC9Zo+u0oYSmfajgl+8o94dVPHj7DlqvNbEee4XIcgZT1zzglXlK3mKxWpbaQAXsQRoAy2RW1QxVNXnt7EHG1GtP8U16Uv7xHXThwBomSJM2ZKYY67EZ1IyVMUWQeLQ4JCTEbUtPPvkkFRQUGJNEUm4mSEnAOlSrmqGqJq+dCTK2fkeKa9yN8o5upYuHNjBByjc3xtgGGOvag4ScOKQzZMgQ8cDxJ598YqsgAZCPCVK+NqlGOKrJa2uCbNiZ4hpdT3mHN9HFn77mwVu+uTHGNsBYF0Eiig5e9HBMuN6Bax52SUyQ8ntCNcJRTV47E2TcVV0ptkEnuvjjV5R3ZAsP3vLNjTG2AcZeCVJ70BhXOxYuXCietxo4cCBFR0eLdx0PHjzox89w3xQTpPxuUI1wVJPXzgSJ/UfsQ144uI7yj/0WHIQxZrtzhYBqemFqDxIfrkXSOXr0qLgHhzRq1Ch64IEH6O6776YDBw7I1xAdLTBB6gDJYpaqovQWYZBa3K4Y4wQrTrJe2L+a8n/eyd6NVC24vHK76oQnCFST2RRB3nfffeKQDpZXsQe5YMECKisrEx5kWFgYdevWzeODyX7UId6D9APYVUXp/QCV6SbsijHuQOIuZO4PK6ngl11MkKZ72HhBu+rE754g+ZqHcWXWW4KVXi9S5vMxxuaxcy6JKDqIppOz93MqPPlbeEnG2HcYu6uJMQ4cxh73IEeOHOnxmsfs2bPZgzTZd6z0JoEzUIwxNgCWl6yJLQZQVHpTytmzhApP72cP0nfQeq2J9dgrRJYzmFpi1VqNj48n3HmsX7/+JUHS09Np8ODBlJWVZVk4X1TAe5C+QNFzHaoZqmryAn27ypzU6laKTGtM2bs+o6LMQ0yQ8s2NMbYBxl5PsUJGLVi5s7z9+vVjgjTZiXYdCD19jmoyqyavnQkyuc1gikhpQFnfLqDic4d58DZp92aKsR6bQc1YGdMeZHh4uAhKjj+tW7cmxF/FQR0ELschHX4w2VhHaLlZ6c3hZqQUY2wELc95q7W7ncKT61LWjv9RcdYxJkjfQeu1JtZjrxBZzmCaIHHNY9u2beI6R3FxMdWtW5eeffZZevXVV2ns2LG2edGDl1gt64jXClQzVNXktbMHWe3auyg8sRad3/4hleT8wgTp1Vp8l4H12HdYuqvJNEGiwsWLF1PNmjXp448/pttvv/1SG7zEar7jWOnNY6e3JGOsFynv+VI63kth8el0bttcKs09xQTpHTKf5WA99hmUbiuyRJAxMTF077330kcffUSPPPIINW/eXMRjxR+7JPYg5feEaoaqmrx29iBTr7ufQmNT6dyW96j0YiYTpHxzY4xtgLGuQzrTp08XDyYjzBxSq1at6MUXX6R77rmHD+mY7EQevE0CZ6AYY2wALC9ZUzuPoNCYZDq7aTaV5f92cp0x9h3GRpf/5LdsvgXV9MKSB7l161bKzs6m4cOH09///ndxQAeJ34P8/SiQnb0bHljM66HekmldRlJIVAJlbnyTygtz2bvRC5wP8qlGNlVprNDlQU6ePJl69ux5qatLS0vpnXfeoTfffNMH3e+bKniJ1Tc4eqpFNUNVTV47DyzVuz1EwRExlLlhBpUX5zFByjc3xtgGGHskSBzM0RIClMNjxLUOPHOVk5NDJ0+e9OMneG6KCVJ+V6hGOKrJa2uC7PEIBYdF0pl106iitJAHb/nmxhjbAGOPBMmxWOX1EA/e8rDVamaMfYdxeq9xFBQSRqfX/pcqy349i2BnQueVEN/1vZmaVLM9U3uQM2bMIAQKcJdwDzI/P98Mfj4vwx6kzyG9osKqovTykTLfgl0xrtFnPFFQMJ1e/QpVVpQzQZrvYsMl7aoTv4dJiK49SACBqx5NmjS5DJNdu3ZxJB3D6s4zb5OQGS7GA4thyNwWqNH3CfHbqVWTL8vDGPsOY3c1McaBw1gXQU6cOJH69u1LQUFBl0nKp1jNdxwrvXns9JZkjPUi5TlfUHAopfd+VHiO8CAdE2PsG4yrkjeGb1FNL0wtseJDEWpuy5Ytov/27dsn4rBq6S9/+QsvsZq0D9UUqCopvcku80sxO+pFUGgEpfccK/YesQfJBOkXVbjUiB11whsCqslsmSB37NhBo0eP9oZLwH7nPUj50FcVpZePlPkW7IhxcHg0Ve/+MFWUFNCZ9a8zQZrvXlMl7agT3j5ENZlNEySAWLJkCeH9x1WrVl0WOWfq1Kn8YLI3TXHzu2oKxB6kyY42WMyOehESGUdpXUdRedFFyvxqJhOkwT61mt2OOuHtm1ST2RJB4jUPLLU6J96D9KYm7n9XTYGYIM33tZGSdtSL0OgkSr3+ASovzKHMjbOYII10qA/y2lEnvH2WajJbIshbbrmFoqKirsBk4cKF7EF60xT2IE0iZL2YakZq10lIaGwKpV43jMryz9PZTW8zQVpXTUM1sB4bgstUZsMECY8RcVfxUHKbNm1cEuR//vMfKioqMiWQrwvxHqSvEb2yPtUMVTV57UqQYfHVKaXjUPGKB17zcEyMMdudKwRU0wvDBBkdHU0bNmwQJ1gRnJyXWH1rCKopkF0Hb0+9whj7RmfDE2pStQ53i3cg8R4kE6RvcNVbC+uxXqTM5zNMkCDE2267jfbv308NGjSgiIiIK1r/7LPPLrv2YV486yXZg7SOobcaVDNU1eS16yQkPKkOVWt/B5Vk/0Lnv/mQCdKbofj4d9ZjHwPqojrDBIk6nnrqqSuCAzjW/corr/ASq8m+Y6U3CZyBYoyxAbA8ZI2oVp+S2/6Jis8fpayd85kgfQOr7lpYj3VDZTqjKYK0Gqw8JSWFzp0751Zo/J6VleUxXF1sbKwgYccABa4qZA/StG7oLqiaoaomr109yMjURpTU+jYqPvsTZX23kAlSt8X4JiPrsW9w9FSLKYLE6VVXe49aQ7gf6Yq4+vfvT8888wzl5eURCO7555+n5cuXX5IPz2jNnj1bEB9ivGKvE3mmT58u8uPf8bwWntZavHgxDR06VDyv5SkxQQZOieS3bK4FHljM4eZcKrL61ZTUciAVZR6k7F2LmCB9A6vuWliPdUNlOqMpgnRsrV+/ftS+ffvLBHC3xLpmzRqaM2cOvfvuu4QXP2699Vbq1avXpbI4Hdu5c2fxvuSgQYNowoQJ9NBDD9GLL75IINdFixbRlClTqGHDhgQyBXl6S0yQ3hCy/rtqhqqavHb1IKNqXEOJGX+gwtM/UM6epUyQ1k3JUA2sx4bgMpXZEkHivmPdunWvaNhdoICtW7fS8OHDae/evQRiBcHhJKyWGjduTPPmzaPMzExKTU2lr7/+mh577DFau3YtFRcXC68SwdEXLFggCLSk5Lf359x9PROkKb0wVEg1Q1VNXrsSZHStFpRwTX8qPPk95ez9nAnSkNVYz8x6bB1DbzWYJsjQ0FBx1aOgoIDee+89Ki0tvdTWRx995JK8tm/fTnfddRcdOnRI3KGcOXMmdezY8VK5+++/X3iMe/bsESdksUwLIkVb8CBXrlwpvMpTp05Rs2bNKCMjg/A2JTxLpJEjR4o/zmnSpEnecODfGQFGwCACp4rj6KfCapQefpGuij5vsDRnZwTUQGD+/MsPoEFqXc9dIVjATz/9RPfee6+uL12/fr1YLl2xYoVYRh03bpwgQC19+umn9PPPP4vlVxzUQb4hQ4bQ4cOHRZbExESxRDtr1iwRIH3atGniRG337t3dHuhhD1JX11jKpNpMVjV57epBxtRtR/FX96L84zvpwoHV7EFasiLjhVmPjWNmtIRpDxINwaNLTk4WS6L5+fmX2h42bJg4iOOc5s6dKw7VgBhBdIWFhTRixAgaMGCAOJDz7LPPUq1ateiOO+6gFi1aCCLs0qXLJW8UQdCXLVtGtWvXpq5du9KYMWMEiWIf091yKxOkUZUwnl81Q1VNXrsSZGz9jhTXuBvlH91GFw6tZ4I0bjqWSrAeW4JPV2FLBGk0WHnbtm3FsipSeXk5DR48mE6cOEFYegVpYukUy7Xh4eEizxdffCFOvSLVr1+fJk+eLB7cjIyMJLi92KfEaVY83OwuMUHq0gNLmVQzVNXktS1BNryO4hp1obzDm+niTxuZIC1ZkfHCrMfGMTNawhJBgshcXffwFIcVZZo0aSIeWcZ1DVcJvx8/fvyyYANpaWkiK7xVI4kJ0gha5vKqZqiqyWtXgoy7qivFNugkyBEk6ZgYY3O2ZKQUY2wELXN5LREkvEGcLHVO2B90tcRqTkRrpZggreGnp7RqhqqavHYlyPgmPSim3rVieRXLrEyQeqzFd3lYj32HpbuaLBGk0SVW+Z9zZQtMkPJRV81QVZPXtgTZtDfF1GlLFw6sofzjO5gg5ZsaY2wTjHWdYq1Tpw6FhIQIkePj4+nf//43xcXFUc+ePb2GgPPXdzJBykdaNcJRTV67EmRCs34UXbsV5f6wigp++Y4Hb/mmxhjbBGNdBOks69NPPy1e+sBdxJ07d/r5U1w3xwQpvxtUIxzV5LUrQSY2v5GiamZQ7t4VVHByDw/e8k2NMbYJxroIElcztBOnkBsX+pFwtxHBxu2QmCDl94JqhKOavLYlyBYDKCq9qQgzh3BzjokxZrtzhYBqemFpDxKxVXHlAgknUi9cuEB4C/LNN9+Urx06W2CC1AmUhWxVRektQCC9qB0xTmp1K0WmNRaByhGwnAlSuhowxv6FWFwrNB1Jx8+ymmqOCdIUbIYK2XHw9vQBqslrVw8yuc0fKSKloXjqCk9eMUEaMhvLmVmPLUPotQImSK8Q+TcDK718vBlj32Cc3G4IRSTXE48l49FkJkjf4Kq3FtZjvUiZz8cEaR47KSVZ6aXAyoO3BFirtb+TwpNq0/lvPqSS7F8YYwkY80qIn0F1as4wQeIgzoMPPkhffvkl9ejRQzxJhag4dk28xCq/Z1QjddXktesSa0qHeygsoQad2zaPSnNPMkHKNzXG2CYYuz3Fisg5eJXj6NGj4i3IgwcPXnGlA69s6Hmr0R/fygQpH2XVCEc1eW1LkJ3uo7C4NDq35T0qvXh5CEjGmO3OFQKq6YVhDxIfvXHjxkunV12B4O7BZPkqc2ULTJDyUa8qSi8fKfMt2BHj1M4jKDQmmc5uepvK8i9/D9KO8npDXzWZVZPXrhM9M8vYHu9B4omrPn360OOPPy68R9yHdEyffPIJe5DerNHN76z0JoEzUIwxNgCWh6xpXUZSSFQCZW6cReWFObz85xtYddfCeqwbKtMZTXmQWmt4dSM7O5tuuOEGEWLuww8/FO892imxBym/N1QzVNXktevMO63baAqJiKXMr2ZSedFFJkj5psYY2wRjXZF08LDx+PHjKSgo6JLYa9eupSeeeMLPn+G+OSZI+V2hGuGoJq9dCbJ6jzEUHBZFZ9a/ThUlBTx4yzc1xtgmGHslSLwDiaXVsLAwWrlyJRUWFtKNN95IUVFRNGjQIPr555/9/Cmum2OClN8NqhGOavLalSDTe/6FgkLD6fTa/1JlWQkP3vJNjTG2CcZeCRIveXz66aeEcHMTJkwQYg8dOpTGjh1L//znP2nJkiV+/hQmyEABrhrhqCavbQmy92MUFBxCp1dPocqKMh68/WyArMfyATe9BwkPcuvWrcJzfOmll8QDyVhurV69Og0ZMoQOHz4sX3odLbAHqQMki1lUM1TV5LUrQdbo++tWyqlVk6/QIMbYolHpKM4Y6wDJYhbTBIl2J0+eLN5+dEy7du2iESNGWBTLd8WZIH2HpbuaVDNU1eS1I0HCc0zv/RhRZQWd+vJlJkj5ZsYY2whjr0usmqwZGRl03333UWxsrDjF6nzlIwDfdFmTTJDye0A1wlFNXlsSZGg4YQ+ysryETq/5Lw/e8s2MMbYRxroJMgAyG2qSCdIQXKYyq0Y4qslrR4IMDo+m6t0fporSQjqzbhoP3qYsx1oh1mNr+OkpbWmJVU8Dgc7DBCm/B1QzVNXktSNBhkTGUVrXUVRenEeZG2YwQco3M8bYRhizBxmAzrDjQKgHBtUIRzV57agXIVGJlNblQSovzKXMjVc+kM4Y67Eca3kYY2v46SltyYNMSUmhmTNnEq587NixgyIiImjevHni6oddEnuQ8ntCNUNVTV47EmRoTDVK7TycyvKz6Oym2ezdyDczxthGGOvyIJcvX05paWlC7N27d1P9+vXFYR2cbM3Pzw/A51zZJBOk/G5QjXBUk9eOBIlXPFI63UeleWfp3OZ3efCWb2aMsY0w9kqQ4eHhtGnTJgIBIXpOeXk5rVixQgQNwB+7eJFMkPK1SjXCUU1eWxJkQg3Ce5ClF07Tua3v8+At38wYYxth7JUg8XDyli1b6Pjx41RaWkplZWUicHmnTp3o5ptvptOnTwfgc9iDDAToqhGOavLakSDDk2pTtfZ3UknOL3R++4c8eAfA8FiP5YNuaQ9y7ty51LRp08ukPHnyJA0cOFC+5DpbYA9SJ1AWsqlmqKrJa0eCjKhWn5Lb/omKs45R1o7/MUFasB+zRVmPzSKnv5wlgkS4uf79+9PgwYNFiDmEl/vHP/5hqyevmCD1K4PZnKoZqmry2pIgUxtRcuvbqPjcYcr6dgETpFnjsVCO9dgCeDqLWiLIMWPGiP1H54TlVQQyR3zWQCcmSPk9oJqhqiavHQkyMq0JJbW6hYoyD1H2rs+YIOWbGWNsI4y97kFC1m3bthG8SFepqKhIeJeBJkkmSPlapRrhqCavHQkyKr0ZJba4mQpP76ecPVe+3MMYs925QkA1vbDkQeJJK1zzWLp0qTiog71HHNY5ePAgtW7dmqZOnUpz5syRrykeWmCClA9/VVF6+UiZb8FuGEfVzKDE5jdS4am9lPP9cvZuzHet6ZJ20wk9H6KazKYJUrvmceDAAbr77rsFNq+99hpdd9111Lt3b/ryyy8FceJtyEAmJkj56FcVpZePlPkW7IZxdO1WlNCsHxWc2E25+75ggjTftaZL2k0n9HyIajKbJkiAsX37dgoKCqLPP/+cSkpKhAdZWVlJr7/+Oj3yyCP0wgsviL3IQCYmSPnoVxWll4+U+RbshnFM3bYUf3Vvyv95J13Yv5oJ0nzXmi5pN53Q8yGqyWyJIMeNGye8R5AkEsgRoeYSEhKoT58+dOONN/IepB6tccijmgJBdNVkVk1eO2IcU78DxTfuTvnHttOFg+uYIA3auS+ysx77AkXPdVgiSFSNgAEgQoSYg7eIwzkNGzYUgQIKCgrkf4GXFtiDlN8FqhmqavLakSBjG15HcY26UN6RLXTxx6+YIOWbGWNsI4x1nWKNj4+nJ598UsRg1VJ6erq4F5mVlRWAz7mySSZI+d2gGuGoJq8dCRLkCJK8+NPXlHd4Ew/e8s2MMbYRxroIcuXKlZScnHyF2P369WOCNNmZPHibBM5AMcbYAFhusmJ5FcusFw9toLyjW3nwtg6p4RpYjw1DZriA6SVW7RQrApbjSsd3330nrnh06NCBunXrRhUVFYaFkVGAPUgZqF5ep2qGqpq8dvQg46/uRTF129GFA2so//gOJkj5ZsYY2whjrx4kAgQgUACueRQXF1PdunXp2WefpVdffZXGjh0rXvqwQ2KClN8LqhGOavLakSATmvWl6NqtKXf/l1Tw87c8eMs3M8bYRhh7JUjIunjxYqpZsyZ9/PHHdPvtt18Sn5dYzfckD97msdNbkjHWi5T7fAnN+1N0zRaUu28FFZzYw4O3dUgN18B6bBgywwVML7GipaSkJLrjjjvoww8/FPcemzdvTp999hl99NFHHgVJSUmhc+fOuc2DekNCQjzmwalZnJjFsq6nxB6kYZ0wXEA1Q1VNXjt6kAgzh3BzOd8vo8JT+5ggDVuN9QKsx9Yx9FaDKYKMiYmhuLg4QYbwIt9++23RzvXXX09PPfUU3X///fT9999f0TZisz7zzDPibiQI7vnnn6fly38LU4V/A9lGRESIsseOHaMHH3yQpk+fLvKDEEeNGiX2N9Hu0KFDvb4cwgTpTQWs/66aoaomrx0JEoHKEbA8e/diKjpzgAnSuhkZroH12DBkhguYIkjEWO3cubPbxhAkICcn54rf16xZI2Kzvvvuu2Kf8tZbb6VevXpdyoenslq0aEFDhgwRQdAffvhh2rx5syBSkOuiRYtoypQp4p4llnbx794SE6Q3hKz/rpqhqiavHQkST11FpDai7O8+paKzPzJBWjcjwzWwHhuGzHABUwR51113EfYZsaSanZ1NeCRZSyCkadOmuRRk69atNHz4cNq7d68oD4LDqVctvfXWW+JOJe5XInTdO++8Q7Nnz6a1a9eKg0DwXPv27UsLFiygQYMGiTzeEhOkN4Ss/66aoaomry0Jsu2fCI8mZ+38hIrPH2GCtG5GhmtgPTYMmeECpghSawXLnRs3bnS5nOpKEsRuBbkeOnSI2rRpQzNnzqSOHTteyopIPDVq1KBHH31ULNdCOAQ/1x5mxr3LCRMm0KlTp6hZs2aUkZFBM2bMEJ4l0siRI8Uf5zRp0iTDwHABRoARcI/Anrx0yi2LpBaxpykhtIihYgSqLALz58+/4tt0nWLFUuiIESPEYR3HhOVQV5F01q9fTy+++CKtWLGCbrrpJkIsV3iSWkIcV3iFw4YNE/+EayRYZgWxIiUmJool2lmzZtHo0aOFp4o9z+7du7u9d8kepHy9VW0mq5q8dvQgUzrcTWEJNen89g+oJOcEe5DyzYwxthHGuggSdx0RMACHZxwDA+BVD1d7kHPnzhX/DmIE0RUWFgqCHTBgAG3YsEGQ4Q033CBiu1599dX0xhtviL1O7aQq9j6XLVtGtWvXpq5du9KYMWME2WIf091yKxOkfK1SjXBUk9eWBNnpPgqLS6NzW+dQ6YUzPHjLNzPG2EYYeyVIBCnfsmUL7dy50+Wypqtvadu2rVhWRSovLxcxW0+cOCE8RJAmovG89957VKdOHUG42Gt86aWXRH7sTU6ePFksu0ZGRhLc3tTUVHGadeLEiW6hY4KUr1WqEY5q8tqRIFM7D6fQmGp0dvM7VJZ35ZUtxpjtzhUCqumFpT1I7BlWr16dnn76acrMzLyEx/79+90uecLjbNKkCe3bt89tHpxQRX2OdxzT0tJE/Y7t6FFBJkg9KFnLU1WU3hoKckvbDeO0Lg9SSFQinf36LSoryGbvRm73u6zdbjqhBwLVZLZEkNgjxAEa54T9RaNEpgdcM3mYIM2gZqxMVVF6Y1/t39x2wzit6ygKiYyjzK/eoPKiC0yQ/lUH0ZrddEIPBKrJbIkgsfyJqxfO6fHHH6f8/Hw9eEnPwwQpHWLlDFU1I7XjYFi9+8MUHB5NZ9ZPp4qSK22dMWa7c4WAanphiSABQKtWrcTBmh9++EE8kqydOJWvHvpaYILUh5OVXFVF6a1gILus3TBO7/kXCgoNpzNrp1JFWTF7kLIVwEX9dtMJPRCoJrMlgsQ9yAceeEDgsnv3bmrUqJE4dIO7jnZJTJDye6KqKL18pMy3YDeM03s/RkHBIXR69RSqrLgyHrLd5NWDvGoyqyavHVdCvOmFJYJEZJzS0lJxxeLo0aPi0A3ehgRBHjx40FvbfvmdCVI+zKoZqmry2nFgqdH3CaFYp1ZNdqlgjDHb3e96iRV7j7j4j+g2tWrVEtc28IoHrlw89thj4l6jHRITpPxeUG0wVE1e2xFkUDDV6DOeqLKSTn356zUs58QYs91VBb2w5EEiUADuQ+I6BrzHsLAw8TfCw9klMUHK7wnVBkPV5LUbQQaFhFF6r3FUWV5Kp9e8ygQp38QYY5th7DVQAORFmDicWE1OTr4kPoKLIz6qXRITpPyeUI1wVJPXbgQZHBZF1XuMoYrSIjqz7jUevOWbGGNsM4x1ESRk7tKlixC9WrVqVFBQQKtWrQrQp7hulglSfneoRjiqyWs3ggyJiKW0bqPF9Q5c86gKS2l2w1iP1bIe60HJWh5LS6x4JaN3797iQWMcynn11VfFU1b33XefNal8WJoJ0odguqlKNUNVTV67Dd4hUQmU1mWkCBCAQAFMkPJtjDG2F8a6PEg8ZoxTrHh9Izc3l95++23xkDGfYjXfmTx4m8dOb0nGWC9SrvOFxiRTaucRIsQcQs3x4G0NT7OlWY/NIqe/nGkPEjFVcUjnyJEjIpIK0l//+lfx388999ylNxr1iyInJ3uQcnB1rFU1Q1VNXrt5kHjFI6XTfSJIOYKVM0HKtzHG2F4Y6/Igcc0D1z3Onz9PxcXF4rFjnGLF81Mcas5ch/LgbQ43I6UYYyNoXZk3LD6dUjreK565wnNXPHhbw9NsadZjs8jpb3AlpQAAIABJREFUL2fag0QTCAoArxERdBC0HI8kY5kV9yHtktiDlN8TqhmqavLazYMMT6xN1a69UzyUjAeTmSDl2xhjbC+MvXqQWGJFUACEmMNDyAkJCZSdfeWzN4H5rN9aZYKU3wOqEY5q8tqNICOS61FyuyFUknWczu/4mAlSvokxxjbD2CtBQl7sQQYFBVHXrl0ve7sxQN/islkmSPm9oRrhqCav7QgypSElt/kjFZ8/Qlk7P+HBW76JMcY2w1gXQS5ZskTsO2K/8cyZM1RZWSk+Y8SIEZSXlxegT7q8WSZI+d2gGuGoJq/dCDIyrTEltbqVis7+SNnffcqDt3wTY4xthrEuguQHk33fazx4+x5T5xoZY2sYR6U3pcQWA6jozAHK3r2YB29rcJouzXpsGjrdBS0d0sG+Iw7nOCc77UWyB6lbF0xnVM1QVZPXbh5kVM0MSmx+IxWe2ks53y9ngjRtOdYKsh5bw09PaUsEmZKSQjNnzqQ6derQjh07KCIigubNm0dr1qzR07Zf8jBByodZNUNVTV67EWR07VaU0KwfFZzYQ7n7VjBByjcxxthmGOtaYl2+fDmlpaUJ0XGatX79+hQbG0s9e/bke5AmO5QHb5PAGSjGGBsAy0XWmDptKb5pbyr4+VvK3f8lD97W4DRdmvXYNHS6C5r2ILVIOvDQoqKixHuQK1asoAkTJog/dvEi2YPUrQumM6pmqKrJazcPMqbetRTfpAflH/uGLhxcywRp2nKsFWQ9toafntKmCRLvQG7ZsoWOHz8u4rHiTUjsPXbq1IluvvlmOn36tJ72pedhgpQOsQgvOH/+fPkN+agF1eS1G0HGNuhEcVd1pbyjW+niIdcPozPGPlJWD9UwxoHDWNcSKwIENG3a9DIpT548SQMHDpQvuc4WmCB1AmUhm2qGqpq8diPIuEbXU2zDzpR3eBNd/Olr9iAt2I6VoqzHVtDTV9a0B4nqcYK1f//+NHjwYKpevTodPnyY/vGPf1BOTo6+1v2QiwlSPsiqGapq8tqOIBt3o9j6Henij19R3pEtTJDyTYwxthnGHj1IHMx56KGHRAQdLKW+/PLLtHPnzgB9gudmmSDld4tqhKOavHYjyPire1FM3XZi/xH7kK4SY8x2VxX0wpQH+c4771CLFi0ufX9hYaEgSzsmJkj5vaLaYKiavHYjyIRmfSm6dmu6sH815f/semLMGLPd/W4JEodzCgoK6J577qEXXnhBkOWQIUPEEqvdEhOk/B5RbTBUTV7bEeQ1/Sm6VgvK3fcFFZzYzR6kfBNjjG2GscclVpAO/owaNYruvPNOGj9+PD399NO0cuXKAH2G+2aZIOV3iWqEo5q8diPIxIw/UFSNa0QUHUTTqQqegt0w1mO1rMd6ULKWx9QSK0jn3LlzhAeTmzRpIjxI/NuxY8eENFOmTKGioiJrkvmoNBOkj4D0UI1qhqqavHYbvJNaDqTI6ldTzp4lVHh6PxOkfBNjjG2GsVcP0pO8N910E2VmZgboky5vlglSfjeoRjiqyWs7gmw9iCJTr6LsXZ9RUeYhHrzlmxhjbDOMPRLkuHHjKCQkxK3Ir732GpWUlATok5gg/Q28aoSjmrx2I8jktoMpoloDyvr2Eyo+d4QHb38b3P9vj/VYPvCmlljli+W7FtiD9B2W7mpSzVBVk9duBFmt/R0UnlSHzu/4mEqyjjNByjcxxthmGOuKpBMgmQ01ywRpCC5TmVUjHNXktR1BdribwhNq0vntH1BJzgkevE1ZjfVCrMfWMfRWA3uQ3hDy8++s9PIBZ4ytYZzScSiFxVenc1vfp9ILrmMuM8bWMNZTmjHWg5K1PEyQ1vDzeWlWep9DekWFjLE1jFOvG0ahsSl0bvO7VJp3lj1Ia3CaLs16bBo63QWZIHVD5Z+MrPTycWaMrWGcev0DFBqdRGc3zaay/CwmSGtwmi7NemwaOt0FmSB1Q+WfjKz08nFmjK1hnNb1zxQSGU+ZG9+k8sJcJkhrcJouzXpsGjrdBZkgdUPln4ys9PJxZoytYVy9+0MUHB5DZzZMp4rifCZIa3CaLs16bBo63QWZIHVD5Z+MrPTycWaMrWFcvedYCg6NoDPrXqOKUtcRsxhjaxjrKc0Y60HJWh4mSGv4+bw0K73PIb2iQsbYGsbpvR+loOBQOr3mVaosL2UP0hqcpkuzHpuGTnfBgBBkSkqKiOXqKdWqVUs8vJyf73oJJzY2VsR7LSsr81gP34PUrQumM6pmqKrJi46xk8w1+j4hdOXUly8TVVYwQZq2HGsF7aQTer9ENZn9SpD9+/enZ555hvLy8ggE9/zzz9Py5cuvwLZly5Y0e/ZsevHFF2nhwoU0ffp0kR+EiBdEKioqaPHixTR06FBBop4SE6Re1TWfr6oovXkE5Je0DcZBwVSjz/hfCXLVZLcfbht5DXSNajKrJq/dJnp6VMOvBLlmzRqaM2cOvfvuuzR27Fi69dZbqVevXpfJGR0dTZ9++inFxcXRSy+9REeOHBFECXJdtGiReCmkYcOGVLNmTUGw3hITpDeErP+umqGqJq+dBpagkDBK7zWOKivK6PTqKUyQ1s3HdA2sx6ah013QrwS5detWGj58OO3du5f69esnCK5Dhw6XCTtv3jzxjNbtt99Or7/+uvAg165dS8XFxRQTE0N9+/alBQsW0KBBg3QFRGeC1K0LpjOqZqiqyWsnggwOi6TqPR6hyrJiOr12KhOkaauxXpD12DqG3mrwK0Fu376d7rrrLjp06BC1adOGZs6cSR07drwkI7zKPn360MCBA2n16tWXCDI0NFR4kHiQecKECXTq1Clq1qwZZWRk0IwZM4RniTRy5EjxxzlNmjTJGw78OyPACOhAoKQihLZdqENhQeXUMeFnHSU4CyOgNgLz58+/4gOkBCuHZ4jl0hUrVhDejMSzWfAktbRhwwYKDw+n0tJSioqKEn8/99xzl/YpExMTxRLtrFmzaPTo0TRt2jR66qmnqHv37mJf0lViD1K+cqo2k1VNXjt5kAgQgEAB5UUXKfOrmexByjcvxtiGGEshyLlz54pDNSBGEF1hYSGNGDGCBgwYQCDH2rVri2VUpMmTJ9PSpUvpzTffpNzcX6N1TJ06lZYtWybyde3alcaMGSPIFvuY7t6fZIKUr12qEY5q8tqJIBFiDqHmygqy6ezXb/HgLd+8GGMbYiyFINu2bSuWVZHKy8tp8ODBdOLECcLSK0jz66+/vgQFllORF3uQSPXr1xekicEtMjKS4PampqaK06wTJ050CyETpHztUo1wVJPXVgT5/9o7E/Asquv/H0ISQggkQBJ2QUQERFkFpCKKgqilgvqvFtdqVURr+Relpe1jW2qx1iqIu1bFfUFwR1xY3dgEkUUUNxQkhBBCIISs/J7vtRNfQt537ix33nfCd56HB0jOuffOZ87Md86dmXMzciTnxMulcm+B7PjoMV68zZ9eZJyAjI0IJPYTU6hdu3aVDRs2RJ0WrYtHbm6u+nF+fr4jXBRIR7hcGYdNcMI23kQSyJRmrSV74CVSsSdfCpY+zou3qzPGHyfGsT8cY7US6Es65nfn0B4okOaph+1EDdt4E0kgU7PaScsTxkrF7h+kYPnTFEjzpxcZJyBjYxlk0PtKgTRPPGyCE7bxJpRAtjhCWva7QMp3fS87Vz7Hi7f504uME5AxBTJOB4UXb/Pgydgd4wbJqZLevrc0O3qolO38RgpXvciLtzuUvngxjn3BGLMRTrGaZ+yoBwa9I1yujMnYFTZp1LKTtOj7/5Tz/h1fya5PfnyBrq6NjN0xduJFxk5oubOlQLrjZsyLQW8MbU3DZOyOceO2PSXr2DOlurxE9m1ZI3u++umt89otkrE7xk68yNgJLXe2FEh33Ix5MeiNoaVAekSbceQgadpliJRsXinFXyyM2Rrj2CNsDXcy1oDk0YQC6RGg3+4Mer+JHtoeGbtj3KzbadKkQ18p/mKRlGxeQYF0h9E3L8axbyijNkSBNM/YUQ8Meke4XBmTsSts0rzXOZKW21WK1r4upXmfUSDdYfTNi3HsG0oKpHmU/vTAoPeHY6xWyNgd4+wBF0lKZlv1eQc+8yBjdxz98mIc+0UyejvMIM0zdtQDg94RLlfGZOwKmypSjmLlqMGKWqwUSHcc/fJiHPtFkgJpnqRPPTDofQIZoxkydse4zekTRRokSd6C6XKgqoIC6Q6jb16MY99QcorVPEp/emDQ+8OR2Y2/HJNSGkurU66XA5XlkrfwLtvGGce2iDwbkLFnhLYNcIrVFlGwBgx687zJ2DnjlIwcycYqHiWFsuPDR2wbIGNbRJ4NyNgzQtsGKJC2iII1YNCb503Gzhk3anmktOh7vpQXfic7P37etgEytkXk2YCMPSO0bYACaYsoWAMGvXneZOyccXq74ySzx0gp3bZBita9YdsAGdsi8mxAxp4R2jZAgbRFFKwBg948bzJ2zjij84nS9KiTpOTb5VK8abFtA2Rsi8izARl7RmjbAAXSFlGwBgx687zJ2DnjzG6nS3qHPlL8+QIp+e5j2wbI2BaRZwMy9ozQtgEKpC2iYA0Y9OZ5k7Fzxs17jZa03KOlaO1rUpq30bYBMrZF5NmAjD0jtG2AAmmLKFgDBr153mTsnHH2gIslJbON7FzxrJQXbbFtgIxtEXk2IGPPCG0boEDaIgrWgEFvnjcZO2ecO2ScNExrKvnvPyxVpUW2DZCxLSLPBmTsGaFtAxRIW0TBGjDozfMmY+eM2wy/STlte/cOkQPVtg2QsS0izwZk7BmhbQMUSFtEwRow6M3zJmNnjJNS06XV0OukurJMti+coeVMxlqYPBmRsSd8Ws4USC1MwRkx6M2zJmNnjFOa5kr2oMuksmSn7PjwUS1nMtbC5MmIjD3h03KmQGphCs6IQW+eNRk7Y9wou7O06HOelBVulsKPX9ByJmMtTJ6MyNgTPi1nCqQWpuCMGPTmWZOxM8bp7Y6XzB5nSOm29VK0bq6WMxlrYfJkRMae8Gk5UyC1MAVnxKA3z5qMnTHO6DxYmh71M9n77TLZs2mJljMZa2HyZETGnvBpOVMgtTAFZ8SgN8+ajJ0xzuw+XNLb95biz+dLyXertJzJWAuTJyMy9oRPy5kCqYUpOCMGvXnWZOyMcfPeYyQtp4vs+vRV2b/9cy1nMtbC5MmIjD3h03KmQGphCs6IQW+eNRk7Y5w98BJJadZadq54RsqLtmo5k7EWJk9GZOwJn5YzBVILU3BGDHrzrMnYGePck6+Vho0yJP/9h6SqdLeWMxlrYfJkRMae8Gk5UyC1MAVnxKA3z5qMnTF2WkUHrZOxM8ZurMnYDTVnPhRIZ7yMWzPojSPmxdsB4qRGTaTVyeOlumK/bF90t7Yn41gblWtDMnaNTtuRAqmNKhhDBr15zmSszzilWSvJHnipVO4tkB0fPabtSMbaqFwbkrFrdNqOFEhtVMEYMujNcyZjfcaNco6SFr3PlbKd30rhqlnajmSsjcq1IRm7RqftSIHURhWMIYPePGcy1mec3r6XZHYfIaU/rJOi9W9qO5KxNirXhmTsGp22IwVSG1Uwhgx685zJWJ8xKuigks7eb5bKni/f03YkY21Urg3J2DU6bUcKpDaqYAwZ9OY5k7E+Y9RgRS3W4o3zpeR7vSo6aJ2M9Rm7tSRjt+T0/SiQ+qwCsWTQm8dMxvqM8fwRzyF3rXlF9ud/oe1IxtqoXBuSsWt02o4USG1UwRgy6M1zJmN9xniDFW+yFix/Wip2/6DtSMbaqFwbkrFrdNqOFEhtVMEYMujNcyZjfcatho6XpNQmkv/eg1K1v1jbkYy1Ubk2JGPX6LQdKZDaqIIxZNCb50zG+ozdVNFB62Ssz9itJRm7JafvFxeBzM7OloKCgqijbNu2reTn50tlZWVUm4yMDNm/f39MGzivXLlS+vfvr08kzpYMevMHgIz1GKP+KuqwVpfvk+2L79Vz+p8VGTvC5cqYjF1hc+QUqECOHDlSbr75Ztm7d69A4G655RaZO/enFcp79uwp9913nxK+6upq+frrr2X8+PHqZ5Ygjhs3Tv3u1VdflUsvvVSKiopi7jAF0lE8uDIO24katvHGKyPDCh5YyaNi7w4p+Gimo9ggY0e4XBmTsStsjpwCFcgFCxbIE088ITNnzpQbbrhBRo8eLcOGDasZMIQwJSVFrrrqKiWIixYtkilTpiiRhLi+8sorMm3aNOncubMgy4TA2m0USDtC3n8fthM1bOONl0BiDUisBVm28xspXPWio0AhY0e4XBmTsStsjpwCFchly5bJFVdcIevXr5cRI0YogRswYEDNgJs1a6ayQ2SYEEVkiIMHD5b58+dLWVmZNGnSRIYPHy6zZ8+WMWPGSHl5ue3OUiBtEXk2CNuJGrbxxksg09v3lszuw2Xf1rWye8M8R3FCxo5wuTImY1fYHDkFKpArVqyQsWPHyqZNm6RPnz7ywAMPyMCBAw8acHp6usoS+/btq/5+5plnJDk5WWWQb7/9tkyaNEm2bdsm3bt3F0zJ3n///SqzxHb11VerP7W32267zREUGpMACYhs3p8l3+/Pkg5pRdIxLfajDPIigfpKYNasQ2sQN+jXr98Bv3d48eLFcuutt8q8efPkrLPOkgkTJqhM0towrTpnzhypqqpSU7AQ0sgtKytLTdE+/PDDcu2118o999wjkydPlqFDh6rMs66NGaTfR/HQ9sJ2Jxu28cYrg8zsMVLS2x0nuz97R/Zt+cRRIJGxI1yujMnYFTZHToFmkE899ZR6qQbCCKErLS2VK6+8UkaNGiVLliyRiRMnyqBBgw4Szci9mTFjhrzxxhvSvn17GTJkiFx//fVKbPEcM9p0KwXSUTy4Mg7biRq28cZLIFv0OU8aZXeWXZ+8JPt3fOkoNsjYES5XxmTsCpsjp0AFEtOmmFbFhizx/PPPl61btwqmXiGaN954o3To0OGgHcCzyOXLl0unTp3k9ttvV99XpaWlCdLenJwc9Tbr1KlTo+40BdJRPLgyDtuJGrbxxksgswddJilNc6Vg2ZNSUZznKDbI2BEuV8Zk7AqbI6dABRIjS01Nla5du8qGDRuiTovWtQe5ubnqx/g+0slGgXRCy51t2E7UsI03XgLZauh1kpSaLvnvPSBV+/c4Cg4ydoTLlTEZu8LmyClwgXQ0Oh+MKZA+QLRpImwnatjGGxeBbJAkbU6fqI78tndudxxEZOwYmWMHMnaMzLEDBdIxMrMODHqzfOMiNj7sUtBx0TCtqeQOGSfV5SWyffF9jvcg6PE6HmAdDmEbc9jGG8ZzjwLpx5nlYxsMeh9hRmmKjO0Zp2S2kewBF0vFnnwpWPq4vUMtCzJ2jMyxAxk7RubYgQLpGJlZBwa9Wb5hvIuNx5jTco+W5r1GS1nB11K4erbjg8I4dozMsQMZO0bm2IEC6RiZWQcGvVm+8RAbP/Yo6LhI79BHMrudLvu2fiq7N7zleBeCHq/jAXKK1Q9kjtsIW1xQIB0fYrMOYQugMAoOGdvHsFVmbs9XH8jerz+0d+AUq2NGXh0Yx14J2vtTIO0ZBWrBoDePm4ztGWcdN0oat+4mRevmSum29fYOFEjHjLw6MI69ErT3p0DaMwrUgkFvHjcZ2zPOGXylJDdpoZa5wnJXTjcydkrMuT0ZO2fm1IMC6ZSYYXsGvWHAXO3eFnCDpIbSetj/lwMHqiVvwTSRA87LMjOObTF7NiBjzwhtG6BA2iIK1oBBb543Gcdm7PUTD7ROxozjugiELS4okObj2FEPYQugMF4MyTh2SKa37yWZ3UeoZ494BulmI2M31Jz5kLEzXm6sKZBuqBn0YdAbhPu/psk4NmN83oHPPIq/WCglm1e6OiBk7AqbIycydoTLlTEF0hU2c04MenNsrZbJODbjlieMldSsdlL48QtSVrjZ1QEhY1fYHDmRsSNcrowpkK6wmXNi0JtjS4HUY9t62ARp0DBFti+cIdWVZXpOtawYx66wOXIiY0e4XBlTIF1hM+fEoDfHlgJpz7Zh4yzJPekqqSrbK/lL7rd3iGLBOHaNTtuRjLVRuTakQLpGZ8aRQW+Ga2SrZBydcVpuV2ne6xwpK/hGCle/6PpgkLFrdNqOZKyNyrUhBdI1OjOODHozXCmQelybHnWSZHQ+UfZ+u0z2bFqi51SHFePYNTptRzLWRuXakALpGp0ZRwa9Ga4USD2uzXuPkbScLlK09jUpzduo50SBdM3JiyOvFV7o6flSIPU4BWbFoDePmoyjM84dco00TGsmOz58VCpLdro+GGTsGp22Ixlro3JtSIF0jc6MI4PeDFdmkPZck5IbSatTb5AD1VWuS8xZvTCO7Xl7tSBjrwTt/SmQ9owCtWDQm8dNxnUzTm3eQVr2v1AqivOkYNmTng4EGXvCp+VMxlqYPBlRID3h89+ZQe8/09otknHdjJsc0U+aHTNM9m1dK7s3zPN0IMjYEz4tZzLWwuTJiALpCZ//zgx6/5lSIPWYZh47UtLbHifFG+dLyfer9JyiWDGOPeHTciZjLUyejCiQnvD578yg958pBVKPafagyySlaa7sXPmslO/aoudEgfTEyYszrxVe6On5UiD1OAVmxaA3j5qM62DcoIFaAxJrQeYtmC4Hqio8HQgy9oRPy5mMtTB5MqJAesLnvzOD3n+mzCDtmaY0ay3ZAy+RqtLdkv/+Q/YONhaMY88IbRsgY1tEng0okJ4R+tsAg95fnnW1RsaHUsnscYaktzte9n2/WnZvfNfzQSBjzwhtGyBjW0SeDSiQnhH62wCD3l+eFEh7nli5o9XQ69QKHjs+mimVe3fYOzGD9MzIawO8VnglaO9PgbRnFKgFg948bjI+mDEyR2SQFbu3ScHyp3w5AGTsC8aYjZBx/Bg36Nev3wHz3ZvvYeXKldK/f3/zHfnUA4PeJ5AxmiHjg+FkD7hYUjLbqG8f8Q2kHxsZ+0ExdhtkHD/GFEjz7OvsgUFvHjwZ/8Q4OSNHck68XL21un3xvZ7fXrVaJmPGcV0EwhYXnGI1H8eOeghbAGHnwjbmsI3XJONm3U6TJh36yr4tn8juz95xFKuxjMnYN5RRGyLj+DFmBmmePTNIMtYmYOpiiOLkKFJesPRxqdiTrz0eO0NT47Xr18vvwzbmsI3X5I2el+Pu5kaPAmmKuE27DHrz4Mn4R8aN2/SQrJ5nS0XxdilY9oSv4MnYV5y8mTaP0xFjCmSCHZA4DUer27BdDMM2XlN33i1PGCupWe3Ud4/4/tHPjYz9pFl3W2QcP8YUSPPsHd2xxGk4Wt2G7UQN23j9FsgGyamS0WmgZBw5SB3fvIUz5EBlmdax1jU63BnrcvJiR8Ze6On58iUdPU6BWTHozaM+3BlbmSNIl/6wTorWv+k79MOdse9A62iQjM1TpkCaZ+yoBwa9I1yujA9nxs17j5G0nC5yoLJcSr5bKaV5G6WyZKcrjrGcDmfGvsOM0iAZmydNgTTP2FEPDHpHuFwZH66MsRgyFkXG5seSVhRIV+Hnm9PhGse+AdRoiAKpASlIEwa9edqHG+PkJi0lq+dZghU7sO1a87Lsz99kFPThxtgoTGaQ8cCr+qRAxg193R3zwmL+gBxOjPEiTtMuQxTU6vJ9Urh6tlQU5xmHfDgxNg6TAhkvxPERyOzsbCkoKIi6082bN5fdu3dLdXV1VJuMjAzZv3+/VFZWxoTHWqzmYytsF8OwjTfWnWy0o5uSkSMZXU5Szxux4WWc4i8WSXVFqfmACGF1JTeMAwEZo5PDIY4TlbGRzzxGjhwpN998s+zdu1cgcLfccovMnTu3hsERRxwhM2fOVMKYnp6ufgeb++67T9lDEMeNG6d+/+qrr8qll14qRUVFFMg4R1HYTtSwjVfn4t2oRUdVcDy5SQtp1LKTJKU2+TFrrCyTXatnS3nR1kCjpD4yDhSgRmdkrAHJo0mgU6wLFiyQJ554QongDTfcIKNHj5Zhw4bV7MLdd98tTZo0kSuuuEKOOuooef7552XChAnyl7/8RSCur7zyikybNk06d+4sbdu2VeJptzGDtCPk/fdhO1HDMt6UprnSILmROkCnnHKKvL98jSSlpkvD9CxJSk6TpJQ0Sc7IlqSUxoccREyj7tuyRkq3f+77N446ERMWxpH7ErYxh228Ojd6OrEVpE2gArls2TIlfuvXr5cRI0YogRswYEDN/s6ZM0c++ugjuf322yUpKUmWL18uV111ldx5551SVlamxHP48OEye/ZsGTNmjJSXl9uyMimQVqku20HQgAQMEsAnGxV7tktV6W4pK9ws5YXfSVXZXoM92jfNi7c9I68WZOyVoL1/oAK5YsUKGTt2rGzatEn69OkjDzzwgAwcOLBmlJhSfeutt+Suu+5SP7MEEoKKDPLtt9+WSZMmybZt26R79+7Ss2dPuf/++1Vmie3qq69WfyI3iGhqaqo9CRcWC1dvljtfWOrCM7ZLanLDHw0a/GRn/bNBg4gf/u/X1o8aRDjUNov0i/k7q8sIo5/6toZ16MCcjs8aauTeWGOsYxel5nc144tk82MrWn4H2R3qVzOeWPuveQxq72OsY/fj4Y6+H/h986aNpU3LDGnZ7Me/kxsmSafWWdI03Ux8+x7YbJAEQkYgLy9Pfv7znx8yaiPPIBcvXiy33nqrzJs3T8466yw1fYpM0togmHg+eeONN0pycrLKJiGg1ss6WVlZaor24YcflmuvvVbuuecemTx5sgwdOjTqCz0mM0gTxzps4wWDsI05bOMlYxNn2qFthi0uwjbe+hTHRgTyqaeeUi/VQBghdKWlpXLllVfKqFGjZMmSJSq7REqL55KwOffcc+Xkk0+uieQZM2bIG2+8Ie3bt5chQ4bI9ddfr8QW9tGmW8MWRGEbb30K+mAuw+56CVtchG28jGN3cenUK2xxEW28RgSyb9++aloVW1U2piFTAAAPCUlEQVRVlZx//vmydetWwdQrBHHt2rVquhTPGjEdNWXKFHnttdeUfadOndSzSQhoWlqazJo1S3JyctTbrFOnTo16nOrLAXEaiEHak7F52mRMxrUJhC0m6tNNiBGBBCA8D+zatats2LAh6rToscceK1999ZX6rMPacnNz1T/z850t6hq2IArbeOtT0Ju/BLvvIWxxEbbxMo7dx6YTz7DFRaAZpBOQftnipZ2HHnrIr+aMtxO28QJI2MYctvGSsfHTTnUQtrgI23jrE2NjGWQwoc5eSIAESIAESMAMAQqkGa5slQRIgARIIOQEDhuBzMzMVIcKtV8TYYs2Hrv6tfEcO8oAoiZu5DPjeI5Hp2880679PDuRGevsU6LakKuZI4P3ORo3bnzQtQsFVnAN2bVrl5lOPbbaokUL9SVDrDrbHrvQdgc/jAffOkZuOrXA671AWm/CIphwwPDW7K9+9Ss5cOCALFq0SHbs2KGYoSgBPkUxvUUbz0knnRSzfq3pccVqH4GEz20mTpyo3irGd6nYUFIQn91YJwHePC4pKYnnUGv67t+/v5x99tlyxhlnyODBg9XP7WoEJ8TARVRNYhTHwLfC2P7zn/8o1om6hYVrJL9Ejl1rnPhGHKUHcV3C9Qqfx2FDnWsw37dvn6pEhqpl3377bUKEB8qD4lr229/+Vi644AL5+uuv1dcK8bjWAgiqs5144onqOt+oUSP529/+pr6oqKsWeF0A671A/vKXv1TfUVrfWS5cuFB9YoLqPaj9ijqxOqXs/Iq+aOP5xS9+EbN+rV/9u2nn9NNPl1//+teqbi6+cbUEcunSpUp8cJLarbbipl8vPqj3i0+GWrZsWSOQdjWCvfTnpy8+ebrjjjvkww8/DDQ2dffBWkjAeikuDFxxzp9zzjk1u5iIsYubOizMgPrV2JD1II7xPTgu6hBIZEOICxRgwczIs88+Kzt37lTXuHhstbli7Cj6cswxxwiudRBIXCOCutZGjgcLYeC7e5QtRaaNG08whVjWrgV+2mmn1Tm7WO8FEgGFqcHCwkI5/vjj5ZFHHlEXexw01H9FRolCBihl98wzzxiPsWjjwbhi1a81PjCNDt58801VwAECiSLy+DYVd7bIIOfPny9/+tOfNFoJzgSFJVAH2Mog7WoEBzey2D3hAojsAbH5xRdfyDXXXFOTTSbCGCHeOOY33XSTGk4YuOIb7BNOOEGNN1FjF6L3u9/9Ts16RG6oStaxY0clkL1791bfmA8aNEiZ/Pvf/5YuXbqoYivx2CK5RvaPzyYsgcRbuEFda2uPB8f6hx9+UNPRuPHENQvXg7pqga9evfoQhPVGICGCl1xyySE7iEo+mPbD9OCFF14o7733nvz+97+Xiy++WHDHhukKBCWC82c/+5lvc+ZOx2NXvzaI4Ieg4M4vcvv8889rpvciBbJHjx7qrhDlAvHvf/3rX4r/Z599FsRQVR8Ya+QqMfgZnjFbNzq1BTIRGFtwYsUHbpZww4YT+9FHH1VF+6dPnx4Y12gdWfFhlY1EzWTEx2233Raz9nK8Bh7JGNOU4GoJOuI2nrEbycSKY/yNa9LTTz99UBxHCiSOwT//+U81bYgNN6XwCVIgo3G1rrUYV6RABnmtjTzO1nhww/HXv/5VZdp4vPbCCy/UWQt8zZo19VcgW7duXXMCRO4lskXAQdH0v//97woMNqTfmFrF1CDu1jHlgukNFDbwY3M6Hrv6tX6Mya4N3CxEFpWHPbj84x//UK6RAolMGFOr1gs7uFjiD56XBbVhyqx20XpMpeCExFZbIBOBscUmWnzgBN+zZ0/Ns1xka1g/Fc93471Z8YFn0tjAGvGBKfhYtZfjNe5Ixq1atZLt27eroSBmcLGMZ+xGMrHiGC/iQHwwBRgZx5EC2axZM3XDitWRkMUjPrAhAQhqi8YV11rrhbhIgQzyWht5nDEe3Mxh2hdLKlqs7GqBR3KsNxlktODAA2M8qEWl9si3GVEIHWk3pgGwpNYf/vCHmmkLk4EWbTzR6teaHIvTtiMFEtkiCslDhPCc76WXXpLLLrss0AzSbvy1BTIMjDElhOcouIgjO3v55ZfVTZ11ctvtcxC/rz3FGgaukVNviRq7OlOsOL6YgscxwGzYiy++qBKDxx9/PIhDf0gfOlOsQV5rI8eDG3hM/yMxev3112vGjutWrFrgh5VAWg+HI3ca89B4uG0FFe7EkI4HUYkn2nhwAOuqXxuXqI/SKQQS47z33ntV9oj/444Wz8rw0pP1ckGijLm2QEarEZwo47XGgWL91vqpeDkDzyALCgoSZpi1BTIMXGtfOBMxdmMJJGYRLrroIhUDuMDjfQVsyIrxgl+8PqeIJZCowY23a48++ujArrWR48HUs3VNtU4enE+4QYpWC7z2SVbvM0i7qwrezEQ92ETYdOrXJsI4I8fQoUMHdZIG+SawFwZhYYwl3zD1n0jCGIt7WLiGOXYjx474wCIOWHM3LFsiXWvBrK5a4BTIsEQTx0kCJEACJBBXAod9BhlX+uycBEiABEggYQlQIBP20HBgJEACJEAC8SRAgYwnffZNAiRAAiSQsAQokAl7aDgwEiABEiCBeBKgQMaTPvsmARKokwA+H8JWXFxMQiQQNwIUyLihZ8dBE8BH1ShgXnvDN2T//e9/1cf5KA7t90fX7dq1U99dffLJJ/Kb3/xGe7cfe+wxOe6441Qhi++///4gP3wk3rBhw5o6s9qN2hiiagtWvUH5uGib2/1xMkZ8DI/va62SanX5xhoHljnDyg2oEYxSfdxIwA0BCqQbavQJJYFJkyapkoMolYa1C1FqDMKDZaVWrVqllhVCxX/UP/Vzw7eiqDS0du1aVShfd8OSPFj2CnU2v/vuu4Pc3n//fSWQsQREt59IO1QeQe3i2jVuI23c7o+T8aAIN/YvVgm1WOMAN/BDUWosvcSNBNwQoEC6oUafUBOA4KDI88cff6yq1GBDdRIIJAqEI/vAhqLGKFOIyjZYRQEChxVhTj31VLWSxZlnnqnWmEP9XrSDur4Q2F69eqnsB0v9oC4sMh0IJDIz2OPP+vXrVX/IXlHrFkvyoDAA1vibOnWqzJs3T13gLYGEmKNkV5s2bdTKNBB5+NYWSFSIqaioUG2hBCCy1qqqKunXr58SPuwnStihD6w1aBUjmDBhgirij+ojWKEFKxsgo7bsUDkJVVGwrBJqhsbaHys4UJsXY8TahcjQUYwbYoXVJ7BAwJNPPqn6Qd+otoJVddAfSuuh2hUEEmt64njAHzVAUacUbMH6uuuuizqOd999V1V5wr6jH2uJtlAHLgcfOAEKZODI2WG8CcQSSFxQUdUfmSYuxFgaB2UIcVHHBgHFaiFYCQLZJ8QIi8RiSnDdunVKVFDuCoICUXruuedUoWQICrZPP/1UunXrptb1Q/FviCYEGOIA0YQPLv4QvkiBhLBAmCGO2LCuHfquLZAoHg7R27x5s1pHECKDKkcofo1CzhB5FJTHNDJWPkGJQIjzl19+qcoIQrjQP8oJQvhRfhFMII6ohIKpXdw0RNufuXPn1hzeOXPmqELrKIWGac6UlBTFA+1g6anLL79cTWljbT5krvgZxosC01iayMqQp02bphbsxs0D9hk1lHHDgJUZoo0DGTBuAFCJCKKLdQG5kYBTAhRIp8RoH3oCsQQSF3gIFy68mMJD9oPsAwKJZYbwc1zk8X8IIjIkZJUQOGScKNiOn23btk3eeecdlfmgJBj88LNRo0ap7BVjQLYJUcAfZG+4iKMvrPUHG6z/Z2WQEDQICWq0IpOCmEUTSPQPYYeQYaFwZKiYasS+YaoWwoTMFjcBaAtZKcQHtpFTrKi5OnToUBk/frwSUtT/xHQ02oq2P5HT06jNixVyUPcYYgah/uabb1SfyEIx3YylndA31r1EFol9BBf0ZQkkuOCGAuOD+OP/kQJZF1eMl1OsoT9V474DFMi4HwIOIGgCsQTSegYJsUNmiALSyPAgiBBLZCRYbgjTdxBIZFfYkNn9+c9/VpklMhdkeNgiMy7rGSSeq0EwsOYjbLt3766mW5G54uUYrG6OZ3Do2xLIWbNmqb6sxZ8hAPh/XRkkhAhighdukB1CbLH8EKY8IZAQMbzAApFBxgkRhMDXFkiIF6ZFMVbU/MR6qWg7Ly/voGeqkftjrbmIfbcWJoYAQhDBDZkgxA7PfDGVOnnyZCWQkTVFkbHjZsQSSGTn2LDv8P3ggw+krKysJoOsiyuEngIZ9JlV//qjQNa/Y8o9siHgViAhNpiqhAjgOSNelMHzPGR6mDrFlCwyPLx9iqlYCCayG2RgkS/pRAoKlmDDeqWwg2BgxQYIA5ZFw/SmJZBYsgdvtCJ7gtgg64yWQdoJJMaK5d8w7QlxwuoGkRkkhBdjRt94PogxLly4UC644AKV6WFpuGj7EymQOAxYgxNZIW4sMG2LfcWG/YFI4u1e7AemfbEEEZ7XInsEQ0sgcSOB/cUajniOimnbyAwylkBiGhyZbO23gHmSkIAOAQqkDiXa1CsClkAiqxo3bpzaN+slHSuDxHNDPCuLzCAtgcQbsA8++KB07NhR+UKQ8OwRF3oIRFpamvo5LuIQSbysA0GBMOHij+lUiCou/BAC9HXkkUfWMMb6itOnT1e/sz7zwAs36BOigSlU/MHLNLEySDx7w/M8ZJB4LoopX2SQ6B99oi2MGcKOZ6bYPzwT7NGjhxI0cMJ6lFY2jP3E9CyyzWj7U1sgreeHEHZMXS9atKhm3HjWCf6Y7rWYwQ7jw80HxoSsEZ/mTJkyRWXaWEy6adOmii0YRhsHXsxBX8g48cLOH//4x3oVw9yZYAhQIIPhzF7qIQEsOYRpxI0bNx60Hh+eSeLlm9qfZsRCgDc1IQSYtkU2V9eGiz1e8EF/XpcXg/hYbdVeSxCr2uNZH/YBG24UkAVaz1xNHEpkq1u2bKnpM7IPZIC4gcD0M8Qb0894Cem8886LORTsI7ii2EA0pib2hW3WHwIUyPpzLLknJFAvCSBrxeclkRtEEs+JuZGASQIUSJN02TYJkIAvBPBGMaaLMS2LF58wbc2NBEwToECaJsz2SYAESIAEQkmAAhnKw8ZBkwAJkAAJmCZAgTRNmO2TAAmQAAmEksD/AX6iwalawI8sAAAAAElFTkSuQmCC", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/troubleshooting.html\n" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker.unlinkables_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "f5f7513e-e15e-4978-a9a1-8829a3c071eb", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'comp_num_clean':\n", - " m values not fully trained\n", - "Comparison: 'postcode_area':\n", - " m values not fully trained\n", - "Comparison: 'postcode_area':\n", - " u values not fully trained\n" - ] - } - ], - "source": [ - "predictions = linker.predict(threshold_match_probability=0.1)" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "1b7207f0-50e9-4a1c-9d1f-35020086545e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilitysource_dataset_lsource_dataset_runique_id_lunique_id_rcomp_num_clean_lcomp_num_clean_rgamma_comp_num_cleantf_comp_num_clean_ltf_comp_num_clean_rbf_comp_num_cleanbf_tf_adj_comp_num_cleanname_unusual_tokens_lname_unusual_tokens_rgamma_name_unusual_tokenstf_name_unusual_tokens_ltf_name_unusual_tokens_rbf_name_unusual_tokensbf_tf_adj_name_unusual_tokenspostcode_area_lpostcode_area_rgamma_postcode_areabf_postcode_areaname_unusual_tokens_first5_lname_unusual_tokens_first5_rmatch_key
1577020.3032440.999999companies_housedatahub1908008953265575376557537623.675612e-073.675612e-074.948061e+060.549839ietgietg33.618927e-073.618927e-079.073085e+060.27795LSLS11.0ietgietg0
87400-1.6572300.240726companies_housedatahub752630344416315791NaN-11.837806e-07NaN1.000000e+001.000000medinetmedinet35.428390e-075.428390e-079.073085e+060.18530NNaN-11.0medinmedin1
6497820.3032440.999999companies_housedatahub46093961021363534846353484623.675612e-073.675612e-074.948061e+060.549839lakeinvestlakeinvest33.618927e-073.618927e-079.073085e+060.27795LALA11.0lakeilakei0
7717620.3032440.999999companies_housedatahub560761133888408924089223.675612e-073.675612e-074.948061e+060.549839alloys irelandalloys ireland33.618927e-073.618927e-079.073085e+060.27795MLML11.0alloyalloy0
2155020.3032440.999999companies_housedatahub307789444229899234989923423.675612e-073.675612e-074.948061e+060.549839alliance cyber defencealliance cyber defence33.618927e-073.618927e-079.073085e+060.27795EE11.0alliaallia0
\n", - "
" - ], - "text/plain": [ - " match_weight match_probability source_dataset_l source_dataset_r \\\n", - "15770 20.303244 0.999999 companies_house datahub \n", - "87400 -1.657230 0.240726 companies_house datahub \n", - "64978 20.303244 0.999999 companies_house datahub \n", - "77176 20.303244 0.999999 companies_house datahub \n", - "21550 20.303244 0.999999 companies_house datahub \n", - "\n", - " unique_id_l unique_id_r comp_num_clean_l comp_num_clean_r \\\n", - "15770 1908008 95326 5575376 5575376 \n", - "87400 752630 34441 6315791 NaN \n", - "64978 4609396 102136 3534846 3534846 \n", - "77176 560761 133888 40892 40892 \n", - "21550 307789 44422 9899234 9899234 \n", - "\n", - " gamma_comp_num_clean tf_comp_num_clean_l tf_comp_num_clean_r \\\n", - "15770 2 3.675612e-07 3.675612e-07 \n", - "87400 -1 1.837806e-07 NaN \n", - "64978 2 3.675612e-07 3.675612e-07 \n", - "77176 2 3.675612e-07 3.675612e-07 \n", - "21550 2 3.675612e-07 3.675612e-07 \n", - "\n", - " bf_comp_num_clean bf_tf_adj_comp_num_clean name_unusual_tokens_l \\\n", - "15770 4.948061e+06 0.549839 ietg \n", - "87400 1.000000e+00 1.000000 medinet \n", - "64978 4.948061e+06 0.549839 lakeinvest \n", - "77176 4.948061e+06 0.549839 alloys ireland \n", - "21550 4.948061e+06 0.549839 alliance cyber defence \n", - "\n", - " name_unusual_tokens_r gamma_name_unusual_tokens \\\n", - "15770 ietg 3 \n", - "87400 medinet 3 \n", - "64978 lakeinvest 3 \n", - "77176 alloys ireland 3 \n", - "21550 alliance cyber defence 3 \n", - "\n", - " tf_name_unusual_tokens_l tf_name_unusual_tokens_r \\\n", - "15770 3.618927e-07 3.618927e-07 \n", - "87400 5.428390e-07 5.428390e-07 \n", - "64978 3.618927e-07 3.618927e-07 \n", - "77176 3.618927e-07 3.618927e-07 \n", - "21550 3.618927e-07 3.618927e-07 \n", - "\n", - " bf_name_unusual_tokens bf_tf_adj_name_unusual_tokens postcode_area_l \\\n", - "15770 9.073085e+06 0.27795 LS \n", - "87400 9.073085e+06 0.18530 N \n", - "64978 9.073085e+06 0.27795 LA \n", - "77176 9.073085e+06 0.27795 ML \n", - "21550 9.073085e+06 0.27795 E \n", - "\n", - " postcode_area_r gamma_postcode_area bf_postcode_area \\\n", - "15770 LS 1 1.0 \n", - "87400 NaN -1 1.0 \n", - "64978 LA 1 1.0 \n", - "77176 ML 1 1.0 \n", - "21550 E 1 1.0 \n", - "\n", - " name_unusual_tokens_first5_l name_unusual_tokens_first5_r match_key \n", - "15770 ietg ietg 0 \n", - "87400 medin medin 1 \n", - "64978 lakei lakei 0 \n", - "77176 alloy alloy 0 \n", - "21550 allia allia 0 " - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict = predictions.as_pandas_dataframe()\n", - "df_predict.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87457d67-ac21-4fa9-a590-70d7170a6c34", - "metadata": {}, - "outputs": [], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(predictions, threshold_match_probability=0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "291bd9c8-6def-4614-914f-dfc2445076dd", - "metadata": {}, - "outputs": [], - "source": [ - "df_cluster = clusters.as_pandas_dataframe()\n", - "df_cluster.sample(5)" - ] - }, - { - "cell_type": "markdown", - "id": "27c01ad5-d68b-4f65-9c0c-90279525615c", - "metadata": {}, - "source": [ - "## Blocking rules\n", - "\n", - "I've pretty much got Sarah's code working, but it currently takes 45 mins to run the whole pipeline end to end, with the bulk of that time being `predict`, which takes 41 mins.\n", - "\n", - "Splink is very, very clear about tweaking performance via blocking [in the tutorial](https://moj-analytical-services.github.io/splink/demos/03_Blocking.html):\n", - "\n", - "> Blocking rules are the most important determinant of the performance of your linkage job.\n", - "\n", - "We need to investigate how to optimise this, especially as we move into using more than two datasets.\n", - "\n", - "To reiterate the goals of this process, our rules need to:\n", - "\n", - "1. Eliminate enough non-matching comparison pairs so your record linkage job is small enough to compute \n", - "2. Eliminate as few truly matching pairs as possible (ideally none)\n", - "\n", - "So let's get to it." - ] - }, - { - "cell_type": "markdown", - "id": "b9fe131b-c0b8-48d9-9c9f-8731f008c386", - "metadata": {}, - "source": [ - "Let's start with the rules in Sarah's code, many of which were commented out:\n", - "\n", - "* `\"l.comp_num_clean = r.comp_num_clean\"`\n", - "* `\"l.name_unusual_tokens = r.name_unusual_tokens\"`\n", - "* `\"l.name_unusual_tokens_first5 = r.name_unusual_tokens_first5\"`\n", - "* `\"l.name_unusual_tokens_last5 = r.name_unusual_tokens_last5\"`\n", - "* `\"l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens\"`\n", - "* `\"l.secondary_name_unusual_tokens = r.name_unusual_tokens\"`\n", - "* `\"r.secondary_name_unusual_tokens = l.name_unusual_tokens\"`\n", - "* TODO: blocking rule on first token name_unusual_tokens?\n", - "\n", - "Note that Splink will generate all comparison pairs that meet ANY of these rules. So as unique values rise, so do the amount of things that must be compared." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a4ae2201-e6ab-46ca-b472-250b421e9b42", - "metadata": {}, - "outputs": [], - "source": [ - "settings_2 = {\"link_type\": \"link_only\"}\n", - "\n", - "linker_2 = DuckDBLinker(\n", - " [df_dh_clean, df_ch_clean],\n", - " settings_2,\n", - " input_table_aliases=[\"datahub\", \"companies_house\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a71ab9a0-ebdf-4b0a-8cfc-2367f9446301", - "metadata": {}, - "outputs": [], - "source": [ - "blocking_rules = {\n", - " 'blocking_rule_1': \"l.comp_num_clean = r.comp_num_clean\",\n", - " 'blocking_rule_2': \"l.name_unusual_tokens = r.name_unusual_tokens\",\n", - " 'blocking_rule_3': \"l.name_unusual_tokens_first5 = r.name_unusual_tokens_first5\",\n", - " 'blocking_rule_4': \"l.name_unusual_tokens_last5 = r.name_unusual_tokens_last5\",\n", - " 'blocking_rule_5': \"l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens\",\n", - " 'blocking_rule_6': \"l.secondary_name_unusual_tokens = r.name_unusual_tokens\",\n", - " 'blocking_rule_7': \"r.secondary_name_unusual_tokens = l.name_unusual_tokens\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "84a0b6bf-67a0-48bd-8901-501854f6f04b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-06-16 14:03:09.506043: Evaluating blocking_rule_1\n", - "2023-06-16 14:03:09.560729: Finished evaluating blocking_rule_1\n", - "2023-06-16 14:03:09.560762: Evaluating blocking_rule_2\n", - "2023-06-16 14:03:09.588338: Finished evaluating blocking_rule_2\n", - "2023-06-16 14:03:09.588370: Evaluating blocking_rule_3\n", - "2023-06-16 14:11:22.168216: Finished evaluating blocking_rule_3\n", - "2023-06-16 14:11:22.168313: Evaluating blocking_rule_4\n", - "2023-06-16 14:23:40.211857: Finished evaluating blocking_rule_4\n", - "2023-06-16 14:23:40.211948: Evaluating blocking_rule_5\n" - ] - }, - { - "ename": "SplinkException", - "evalue": "Error executing the following sql for table `__splink__analyse_blocking_rule` (__splink__analyse_blocking_rule_d7bbebb07):\nCREATE TABLE __splink__analyse_blocking_rule_d7bbebb07 AS\n(\n WITH __splink__df_concat AS (\n SELECT\n 'datahub' AS source_dataset,\n \"unique_id\",\n \"comp_num_clean\",\n \"name_unusual_tokens\",\n \"secondary_name_unusual_tokens\",\n \"names_tokens_stopwords\",\n \"postcode\",\n \"postcode_alt\",\n \"name_unusual_tokens_first5\",\n \"name_unusual_tokens_last5\",\n \"postcode_area\"\n FROM datahub\n UNION ALL\n SELECT\n 'companies_house' AS source_dataset,\n \"unique_id\",\n \"comp_num_clean\",\n \"name_unusual_tokens\",\n \"secondary_name_unusual_tokens\",\n \"names_tokens_stopwords\",\n \"postcode\",\n \"postcode_alt\",\n \"name_unusual_tokens_first5\",\n \"name_unusual_tokens_last5\",\n \"postcode_area\"\n FROM companies_house\n )\n SELECT\n COUNT(*) AS count_of_pairwise_comparisons_generated\n FROM __splink__df_concat AS l\n INNER JOIN __splink__df_concat AS r\n ON l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens\n WHERE\n l.\"source_dataset\" || '-__-' || l.\"unique_id\" < r.\"source_dataset\" || '-__-' || r.\"unique_id\"\n AND l.\"source_dataset\" <> r.\"source_dataset\"\n)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:632\u001b[0m, in \u001b[0;36mLinker._log_and_run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 632\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_sql_execution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_sql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemplated_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mphysical_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 633\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 634\u001b[0m \u001b[38;5;66;03m# Parse our SQL through sqlglot to pretty print\u001b[39;00m\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/duckdb/linker.py:221\u001b[0m, in \u001b[0;36mDuckDBLinker._run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_sql_execution\u001b[39m(\u001b[38;5;28mself\u001b[39m, final_sql, templated_name, physical_name):\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_con\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_sql\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mRuntimeError\u001b[0m: Query interrupted", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mSplinkException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[35], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m rule \u001b[38;5;129;01min\u001b[39;00m blocking_rules\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdatetime\u001b[38;5;241m.\u001b[39mdatetime\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: Evaluating \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrule\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 9\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[43mlinker_2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcount_num_comparisons_from_blocking_rule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblocking_rules\u001b[49m\u001b[43m[\u001b[49m\u001b[43mrule\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdatetime\u001b[38;5;241m.\u001b[39mdatetime\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: Finished evaluating \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrule\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 13\u001b[0m rule_counts[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrule\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mappend(rule)\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:2724\u001b[0m, in \u001b[0;36mLinker.count_num_comparisons_from_blocking_rule\u001b[0;34m(self, blocking_rule)\u001b[0m\n\u001b[1;32m 2722\u001b[0m sql \u001b[38;5;241m=\u001b[39m number_of_comparisons_generated_by_blocking_rule_sql(\u001b[38;5;28mself\u001b[39m, blocking_rule)\n\u001b[1;32m 2723\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enqueue_sql(sql, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__splink__analyse_blocking_rule\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2724\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_sql_pipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mas_record_dict()[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 2725\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount_of_pairwise_comparisons_generated\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:574\u001b[0m, in \u001b[0;36mLinker._execute_sql_pipeline\u001b[0;34m(self, input_dataframes, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 567\u001b[0m dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sql_to_splink_dataframe_checking_cache(\n\u001b[1;32m 568\u001b[0m sql_gen,\n\u001b[1;32m 569\u001b[0m output_tablename_templated,\n\u001b[1;32m 570\u001b[0m materialise_as_hash,\n\u001b[1;32m 571\u001b[0m use_cache,\n\u001b[1;32m 572\u001b[0m )\n\u001b[1;32m 573\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 574\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 575\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pipeline\u001b[38;5;241m.\u001b[39mreset()\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:567\u001b[0m, in \u001b[0;36mLinker._execute_sql_pipeline\u001b[0;34m(self, input_dataframes, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 564\u001b[0m output_tablename_templated \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pipeline\u001b[38;5;241m.\u001b[39mqueue[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39moutput_table_name\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 567\u001b[0m dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sql_to_splink_dataframe_checking_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 568\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_gen\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 569\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_tablename_templated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 570\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaterialise_as_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 571\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 572\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 573\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 574\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:803\u001b[0m, in \u001b[0;36mLinker._sql_to_splink_dataframe_checking_cache\u001b[0;34m(self, sql, output_tablename_templated, materialise_as_hash, use_cache)\u001b[0m\n\u001b[1;32m 800\u001b[0m \u001b[38;5;28mprint\u001b[39m(sql)\n\u001b[1;32m 802\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m materialise_as_hash:\n\u001b[0;32m--> 803\u001b[0m splink_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_sql_against_backend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_tablename_templated\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtable_name_hash\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 807\u001b[0m splink_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_execute_sql_against_backend(\n\u001b[1;32m 808\u001b[0m sql,\n\u001b[1;32m 809\u001b[0m output_tablename_templated,\n\u001b[1;32m 810\u001b[0m output_tablename_templated,\n\u001b[1;32m 811\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/duckdb/linker.py:216\u001b[0m, in \u001b[0;36mDuckDBLinker._execute_sql_against_backend\u001b[0;34m(self, sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_delete_table_from_database(physical_name)\n\u001b[1;32m 211\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;124mCREATE TABLE \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mphysical_name\u001b[38;5;132;01m}\u001b[39;00m\n\u001b[1;32m 213\u001b[0m \u001b[38;5;124mAS\u001b[39m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msql\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m--> 216\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_log_and_run_sql_execution\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemplated_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mphysical_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DuckDBLinkerDataFrame(templated_name, physical_name, \u001b[38;5;28mself\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/envs/lead_generation_experiments/lib/python3.9/site-packages/splink/linker.py:644\u001b[0m, in \u001b[0;36mLinker._log_and_run_sql_execution\u001b[0;34m(self, final_sql, templated_name, physical_name)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SplinkException(\n\u001b[1;32m 645\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError executing the following sql for table \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 646\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtemplated_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mphysical_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfinal_sql\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 647\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n", - "\u001b[0;31mSplinkException\u001b[0m: Error executing the following sql for table `__splink__analyse_blocking_rule` (__splink__analyse_blocking_rule_d7bbebb07):\nCREATE TABLE __splink__analyse_blocking_rule_d7bbebb07 AS\n(\n WITH __splink__df_concat AS (\n SELECT\n 'datahub' AS source_dataset,\n \"unique_id\",\n \"comp_num_clean\",\n \"name_unusual_tokens\",\n \"secondary_name_unusual_tokens\",\n \"names_tokens_stopwords\",\n \"postcode\",\n \"postcode_alt\",\n \"name_unusual_tokens_first5\",\n \"name_unusual_tokens_last5\",\n \"postcode_area\"\n FROM datahub\n UNION ALL\n SELECT\n 'companies_house' AS source_dataset,\n \"unique_id\",\n \"comp_num_clean\",\n \"name_unusual_tokens\",\n \"secondary_name_unusual_tokens\",\n \"names_tokens_stopwords\",\n \"postcode\",\n \"postcode_alt\",\n \"name_unusual_tokens_first5\",\n \"name_unusual_tokens_last5\",\n \"postcode_area\"\n FROM companies_house\n )\n SELECT\n COUNT(*) AS count_of_pairwise_comparisons_generated\n FROM __splink__df_concat AS l\n INNER JOIN __splink__df_concat AS r\n ON l.secondary_name_unusual_tokens = r.secondary_name_unusual_tokens\n WHERE\n l.\"source_dataset\" || '-__-' || l.\"unique_id\" < r.\"source_dataset\" || '-__-' || r.\"unique_id\"\n AND l.\"source_dataset\" <> r.\"source_dataset\"\n)" - ] - } - ], - "source": [ - "rule_counts = {\n", - " 'rule': [],\n", - " 'count': []\n", - "}\n", - "\n", - "for rule in blocking_rules.keys():\n", - " print(f'{datetime.datetime.now()}: Evaluating {rule}')\n", - " \n", - " count = linker_2.count_num_comparisons_from_blocking_rule(blocking_rules[rule])\n", - " \n", - " print(f'{datetime.datetime.now()}: Finished evaluating {rule}')\n", - " \n", - " rule_counts['rule'].append(rule)\n", - " rule_counts['count'].append(count)\n" - ] - }, - { - "cell_type": "markdown", - "id": "6838681c-56c8-41c3-a59c-e5086b448b65", - "metadata": {}, - "source": [ - "Rule 5 (and likely 6) took 90 mins and counting to evaluate. It's not going to work for us.\n", - "\n", - "Interesting that from the error we can see the `union all` that powers the counts." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "8a145c84-63cf-4d10-8396-dc6e119a3f1c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rulecount
0blocking_rule_190528
1blocking_rule_2124344
2blocking_rule_3791167934
3blocking_rule_41265705234
\n", - "
" - ], - "text/plain": [ - " rule count\n", - "0 blocking_rule_1 90528\n", - "1 blocking_rule_2 124344\n", - "2 blocking_rule_3 791167934\n", - "3 blocking_rule_4 1265705234" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(rule_counts)" - ] - }, - { - "cell_type": "markdown", - "id": "07b29cc9-c3ac-4d7d-92d5-23596c0a433a", - "metadata": {}, - "source": [ - "Let's test the cumulative function." - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "ba916bb1-e9dc-4321-bae3-ae9003cb5bc8", - "metadata": {}, - "outputs": [], - "source": [ - "blocking_rules_2 = {\n", - " 'blocking_rule_1': \"l.comp_num_clean = r.comp_num_clean\",\n", - " 'blocking_rule_2': \"l.name_unusual_tokens = r.name_unusual_tokens\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "422053d5-19c9-496b-a22c-4b08398bc6a4", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.vegalite.v4+json": { - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "data": { - "values": [ - { - "cartesian": 941960454400, - "cumulative_rows": 90528, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 1.0. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 90528, - "rule": "l.comp_num_clean = r.comp_num_clean", - "start": 0 - }, - { - "cartesian": 941960454400, - "cumulative_rows": 132255, - "reduction_ratio": "The rolling reduction ratio with your given blocking rule(s) is 1.0. This represents the reduction in the total number of comparisons due to your rule(s).", - "row_count": 41727, - "rule": "l.name_unusual_tokens = r.name_unusual_tokens", - "start": 90528 - } - ] - }, - "encoding": { - "color": { - "field": "rule", - "legend": null, - "scale": { - "scheme": "category20c" - } - }, - "order": { - "field": "cumulative_rows" - }, - "tooltip": [ - { - "field": "rule", - "title": "SQL Condition", - "type": "nominal" - }, - { - "field": "row_count", - "format": ",", - "title": "Comparisons Generated", - "type": "quantitative" - }, - { - "field": "cumulative_rows", - "format": ",", - "title": "Cumulative Comparisons", - "type": "quantitative" - }, - { - "field": "cartesian", - "format": ",", - "title": "Cartesian Product of Input Data", - "type": "quantitative" - }, - { - "field": "reduction_ratio", - "title": "Reduction Ratio (cumulative rows/cartesian product)", - "type": "nominal" - } - ], - "x": { - "field": "start", - "title": "Comparisons Generated by Rule(s)", - "type": "quantitative" - }, - "x2": { - "field": "cumulative_rows" - }, - "y": { - "field": "rule", - "sort": [ - "-x2" - ], - "title": "SQL Blocking Rule" - } - }, - "height": { - "step": 20 - }, - "mark": "bar", - "title": { - "subtitle": "(Counts exclude comparisons already generated by previous rules)", - "text": "Count of Additional Comparisons Generated by Each Blocking Rule" - }, - "width": 450 - }, - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApcAAACPCAYAAACvdvBOAAAAAXNSR0IArs4c6QAAIABJREFUeF7t3QeUZUX1NfAaEUWQHCQHA0GyiCRJEgQBCSIZJAfJCEpOkoNkyYLkDKKAJAFBBREQREDJEgQkKBklfOtX/1XzXd7c+/r1TE/36+5Ta80amHdv3apdp6p27XOqasQCCyzwUYoUCAQCgUAgEAgEAoFAIBAI9AECI4Jc9gGKkUUgEAgEAoFAIBAIBAKBQEYgyGUYQiAQCAQCgUAgEAgEAoFAnyEQ5LLPoIyMAoFAIBAIBAKBQCAQCASCXIYNBAKBQCAQCAQCgUAgEAj0GQJBLvsMysgoEBh9BKaaaqo044wzpgceeCD997//Hf2MBujNz372s2nSSSdNzzzzTGMJPvGJT+TfPvzwwz4t5fjjj598/6233sp/WlNPv/dlYTqt4wQTTJDmnHPO9Le//S395z//6csijNW8+hPL0a3ItNNOm77yla+kZ599Nj344IPp/fffH92s4r2UUrHpOjD6ui/3BHhrWeq+32kf7OlbPf1uzJZeeumlnh7t9e91mPcW6/7CoalyQS573ezxQiDQdwgsvPDC6fDDD0/IRkkvvvhi2mKLLdLzzz/fdx9qyGnmmWdOG2+8cXryySfTz3/+89H63pVXXplmmGGGTBq/9rWv1ebxhS98IV188cX5t3/84x9pjTXWaPut3/zmN2miiSZKe+65Z7rhhhtGebb6+9JLL52WW2659Kc//SltvfXW6Rvf+EZaYokl0q233pr/HHrooR/7fbQq2eFL11xzTfrc5z6XDjvssHTZZZeN8tbUU0+dfvrTn+aFREnvvPNO2muvvdJvf/vbDr8ycI/1J5a9reU888yTjj322Gw3JSGWp512WvrZz37W2+zG+vObbrpptoMzzzyz7aKsriCf+tSn0u9///v801e/+tWPPaL++ke7/thp5cYbb7x0xx13ND7e+u1O8y3P3X333WnEiBGj1KEun+9973tp++23H+Wnt99+O5144onpkksuyb+VPBdddNExWqivssoqab/99stts/rqq4/yXeONZKxRhr5MxgILuWrSni+88ELadtttO7KXnsaivixvXV5BLsc2wpF/INCAAMJ10UUX5cH1o48+ymRymmmmyUrB//73v7TkkkuO0eDYCfDf+ta30oEHHphefvnltMIKK3Tyysee+fznP58HdeW/7rrr0r777lubx1FHHZWWWmqpkb8tv/zy6dVXX238Xm/I5RRTTJG+853vpBtvvDGdcsop6eijj87Y3XzzzelHP/pRWm+99T72e68r2YsXyoB+xBFHjJzsyuuf/OQncxknnHDC/E///Oc/0+STT54QBQkxLhNWLz7Zr4/2J5a9qVi1L33wwQfpL3/5S5pyyinTdNNNl7NhExdeeGFvshzrz1o0TTbZZI0LqHYFaEcuJ5544mz7fUEuEZyy6EFsquqZPr/qqquOEU7F3jshqYVcKgNV2rgJv7IwX3bZZdO///3vPiOXCKVFn36KaLYm9qQd1l133T4fpwu55NXwx9hhYWpu8P/LLLNMj7i3G4t6fLkPHghy2QcgRhaBwOggcPnll6eZZpopvf766+nb3/52evPNN/Ngddttt6Vxxx03nXrqqen0009PVuD7779/djsb0BHBH/7wh9nlh8wZaH7xi1+kn/zkJ1kJOffcc3OeBsQdd9wxrbbaaun+++9Ps846ax6Mn3vuufTjH/84TxRW/CYQ+T766KOZiLWmLbfcMm2wwQbpM5/5TDJxe46C4FsnnHBCHty9zz200kor1UJBZVE3Ln9/U/WoeyUhh9ttt10ui/JxOVFNinLZ7vfZZpstrbnmmummm27KBH3zzTfP+CHov/71rzOJLb+rt+8jf1RW//3uu+9mpQfGMJGPev71r39NlGXqV/ldeZHwXXfdNStk7733XlZi99lnn/TEE0+kdgM6ovvd7343Y+Xvp556KlcfFhRk7UlFbtfeVFnf8q7vI0+vvfZaOuigg9Iee+yRCZUJVvv8/e9/z4sXCxblR+5h6js77bRTDiFAyhB/zygXAsHu4Ka8Fh/s0eTPpn73u999DEvl3n333RPV0GQP/4MPPjjde++9uW5NtqOM7Wzzz3/+c8akXd5VQ7viiiuyPfo+ey8kiGr55S9/OYcfbLbZZslChLr5xS9+Mb/OSwArbdhTeUzscKZUsRt4HHnkken666/PixmLNDajn04//fRp8cUXz/105ZVXzrjr337/wQ9+kDFfZJFFMmZUL/n84Q9/aCybsspf+48zzjjpnnvuybYptRKzKrnUDvPOO2+2YYu/Qw45JHtKvHvXXXfl8klXX3118p7+po1LqpJLdawLO9H/jT1zzz13Lpv+ZhHl3yTvIWmTTDJJxkDoz957753/u5DL22+/PePBBimlu+222yjjSCGX1YUw0gU3OLJp77Yql+1s0Ee+//3vp3XWWSe30b/+9a9s78aHVnJJxVQX5UYoYeb7vCbbbLNN4zjLliW2o/3Ykf5VxINvfvObo9S1kMvqItXYYIyUjF0wbDf2t45FQkWUQR9gD2yRCjq2wrCCXI4OK4h3AoE+QKAQLgqGQb2kaqyMCRMJNXgaBPxmQDN5Uv8QNJPLLbfckgdkROv888/PxMpgbXAyoEkGFIO/vBBEk9Vxxx2XCaf8DIIG4mpCNnfZZZf8TyZTqpv3Ecmdd945T4YIjfcff/zxPOi2pqKOKr+ybbLJJjmvUq655pornX322fk1pE4dfUOCC8LQ7veqW9ygbLIwUXA3I93UwarbHHlaYIEFcv5UAJOqVNqhqh4invKSYIFAajdlNMmZWJHu4jprRy7VXfvADibVVNocKWnX3tQZhKtgpT1L0gYlnxJ6YKIsyk4h9p43wXPLmiDFKCI46lpsweR1/PHHZ6JbkjyRtCqWVGvqtTbyberKK6+8kkyY7WxH/dvZJjtql3erjd155525X3Dbw68pXXvttXnhon/4gzz5GxFEvJr6ivL4XZ9DgNgNsiRRnIWFIFAlsWNkx+LL89SvojwhwmKEtSXMkDFk0/NNZUMCNtxww5y98lo8ldRELv3u2xZAxYaRSd+wODIeIJllzGA/2rsao1oll9q/qlwicvo/0mUh6z22zZ4kY4m6XXrppbk/I6by899lIVXta2ywuILr2rGQS9g+9thj+Rsw1X/Z7te//vX8b1VyaVHZNH6xQYtmuEtvvPHGSK+CMUL/LsolJdiz8PQ8Al51i1uYtrMd5BXmUnVMkd+CCy44irkWcmnh6dtwQSxnmWWWkWMnL027sb86FiH7CK3xQj2NWfqL8WyttdZq7C9j8kOQyzFBL94NBMYAgTI4UdOQoLpk9U8pKRO2AUEcocnCoG2w6YRcGuQpOoicmL/iMuvJLV5cd4W8IrsmRwk5QTYRVAMWkleXuI++9KUvZVJGaaUMSpQkiir11CRHNeBmQ2xMWhJySdVt93trzGWrW7waJ2iARkQkEwfVySToj0lroYUWGjlpUODEkxYC5u+zzjork3gTjwGbSkoZMjFqp3bk0uBONYDDDjvsMFrtjcAjlyYoSqQ2UHYTO+JDOaaMlfIUcqnsFhPUOSSltL/FiYnrnHPOycRio402yuUyUVI5StwaZQ0WrTGXhdSZ7NXPZI/EsjHfRFabbMdk7TtNttkub/UvSZ8obVpCC4pdl2fgoa0uuOCC/E/w928maO/rZ/PNN1/b8lDIkDoLHcQKqUIqqYjqXsjleeedl+vPHtguIkVpRi7gQU3UZlW3uN/blU3ZKdX6vny0ZVmgtCOXFlp//OMf0xlnnJHr57/VHTlCNNiDNtDv9EV9spqq5LLVYB966KFsLzCnDusPFF0EVt76sE1rFiPFtVxUeeMZwl7GQPVhL7/85S+zim6MQearqSnm0jMlP7ZUJZe/+tWv2togFdfi2KLD2AQPfQg+FmDa1LhQFnHqxgsg1ZHLJlvmTZpjjjlyHdUV4bX46olctmLueSqpb/eGXCL8+huCr47+XzkknhjjWV+nIJd9jWjkFwh0iECZrEpsYHlt/vnnz5tCHnnkkewuQyANaMWFZeONQbsar1QmcAOYgaxVuSyEhmphIC2DWk/kskzw1Y01JlbkFomjZrQjlyZE9aNWmGAQSASXYsMtR5EpLs0ywMOhqLq+a/JCapt+7w25pMYhUhISi2QZaA36EkJQYszK7wgllx+3IkLh/dlnn/1jrdwJuUQehCa0xrdy/8qP6sO13a69EXPksiw2KIRIcInDQhQoglRbalwhl4VUc9kiAaWuiIbNVa27U7nbLHqQgbJRyjut5FL7VDdnsSs2w9Xck+2wc+Vtss12ebd2MbaEAJSNVFyAwgfYH2ULHtRB/1aXrrrqqvxsU3ksOhCzukS1ZSOIiMmbu1NiV2ylKJzl3fvuuy9v2KuSS0pSu7JRBgsZRHzYTNkw1s4tXjbYFWJWPAbCbeDPHc32ECyEp3VTWZVcaufqxhWYqi8Flq1U1VR1NS6wT/mX8akVvzKGlUVMGduQzAMOOOBjj5c6FALnR/1J6AP7LaE2VXKpPhYPTeMXO0WIy0Kz+sHiFq/+WzWfOnLZZMtlPNPf/Hc1Vr2dcsnD9PTTT+dFaQk7KN9oJZetY391oUvVrXohqnUaW7HeQS47JALxWCDQ1wgUUmXApnCIQzMhUTGRMStULmQTRHWHNbJmwkS2TIgGDgOq57mBkI9WcmmQNXn0llyaFKiTXI2IBXce9USyAi6TSJNyWZSyOuyobQY8O3kRzocffjiragZ7EygMDOZW+O1+7w25pOohXBI1SChAmUQK4W7dZFAll+JBvUedQ/ZNEkUF60m5RBzFjkrrr79+djGra1FXYEwJa9fe1JTRIZdlsi6LCXVVlqJCWyggaGXHbZVcVolBlVxS0KjhbBHx0ZYllhGZ9Ww726GWIXN1tqlN2+VdlMpiV0VdRrrVS1ycJOZUmRAhtoiIqLtFDfVTfxM2oP8oc1N5EADkEonhMoUVom7S1zeFMyAohfT7dlGrhEywY+5U4Rh15NKCo13ZLEws6CiQSEVZVPhOO+WSy5Prs2yoo7JutdVWua0stEqoRNWtXO2rncRcFuJkXGJHSJ73kEv2jJQWNziFUyyvWFf9p7WvdUIuW2Mu2Y9+VOyoSi55JtrZoPHAwvXkk0/Ou/bhRbm0gC5ucQtQbSg+nh2xTWNXHblsGmdLXLV2pJLzOFCVe1IuqzGXhAbfLjZGGW439lfJpT6gHSwuShiAfzPGeq7qCeireS7IZV8hGfkEAr1EgDvVgCaVDTEUS8lmDKRK3KS4LckkRh3gNpK4lQw2XGzet8KlaEidkkuuVO4ng6WYwPKtUhVqChIgfwHgyJSJwyBLoVG+dsplUWdMxtzfkonAJGlgozSZsIsqyz2IAJfYK5My5afd763kkqsLOaLmGYBNpNU4QeTdRhgTK3WYCowctcaBlUm7Si6VD4m3IEBcxFIpayfKpXoj5mV3OKUSOVF/ZUFOkZh27c1mRodcmiCRd+qG7zl6ymKDq1vbIvhsARmQ4EU5gl0TuTQ5moTVx4ROZYGH/ClzNrK0sx1t30TmKIXt8kbQqqkat6s+yAAXfcG6kIJCRPxuoVA2xehrFg3tyGUhqtqaPZvY9Ud92EaoVnLJfa5csEZELPzYfHE/lzAJfZ3NIjjKW1c2hMCiQ908jxgUV207cmkcYLOFSGhTCw2JakkxlVq9JwXbKrmsO27HAgNZQnz1dc+X2Ed2zOPCxiSYIZfIXvFajA65hIFNbJK8imJq8WBRWiWXiGw7G9Tmxk84KYv+R+lkm9zhJeaSS1n9YF48KL0hl2UzjrJTXsWBl3G/nXJZJZcWRMSGoo6XcJ6msb9KLi1ky1xjkSQ8g00glcad3p6h2clUF+SyE5TimUBgLCEg3oWrtepSQjpMziY/iTJG0SubXAwmCJ3YLsol9am43rjoqDFN5BKZMamVFbPJgKu17ORudZ0YaCkl4rVKMiBxJ5ngF1tssVyW6gad8pw4y3L8i9gsMVolFaXJxGvndHXzCKKr/Ca+4oZq93shl0W9NVhSapAc5NFkjSyV3+Fj4jfAlgQ3bjeTVuuEh3jZDc0tzv1X1BDvyhshgyeljfrU7pxLu7ORiOq3EUs7P0ssV7v2pspRQIt6URYHxS0uDlPdW2MutU85/5HKrK4WK6Vu6uLfkQS2iDAgXk3ksmApH261qv0iLTZ79WQ7ZUNPUXtabbNd3nXdscSJFkLpGUovVZEtCRMom9wKqdJuFg9iJHsqjzajrJXFnfwpgRYb3NYWYlXlEumzoIKDJCzEu6VtLJjKZgoTPxVdGerKRh3Wl8rB3b5TCErTOZfqhlSXhRpSiiQVIlE2KCnb2muvnTfktaaezrm00LEhCHkyPiFk7Mh4VFTSEgNd8hYag8zX9bVCyEtsYrU8TTGX6kgR1K+kKrlU13bjF08MXKttShWnYiOl2tTYwLOkXxrHiupfNt4Zb8qGniZbZpNszOJQMu5ZqDcdF1W3W1wZylFvygbndmN/6zmXyCgyXUJgivel3TmmYzLtBbkcE/Ti3UCgjxBAxBAPZKgcUVPN2uAkjgzxMgm13jpSdmiOzsHrBhuDK0LXdNuEyQLBlL9JamwkEyfXk0mpbiXd0++teFGuyjlxdeVFCuGO9FKeepMoMNxmo3s7h7rA03cppq317am9Oy1riblEAJ0NiGC2th/bMcmOSbtaZCCYsGzFZExtp13edTjoR9qWu7bumBX2TsGFBVvr7VEs8kYMKJBCWdol39JvtXGd61EZ4AMzv/dUNkTOM1ToThM8EJHWdilkrUqIO82z9TkEVp+wWamu7wohQKy46C1q+jv1ZINc45Q8C+aiivZlGS0itAObtPiycUp4AhXTAnFMUm/GfuOK0Az2oK+ODcWy1CXI5Zi0arwbCAQCgUAXI1All91+QHsXwzjkilYUQhWjqHXjDUZDCXSKZzkhonoUUesxdEOpzkEuh1JrRl0CgUAgEKggwOUrvlS8lpjISIEABLjhhQtQ0lp3ZQdCYwcBMaHiUYXNULwpmDbpDNUU5HKotmzUKxAIBAKBQCAQCAQCgQFAIMjlAIAenwwEAoFAIBAIBAKBQGCoIhDkcqi2bNQrEOgAARs5bCqobgSyM9URKnZCD0TwfQfFHrBHyq76AStAfLjPEBjObanuNgXWbSSr+/c+Az0yyggIVbFxqO54p6ECUZDLodKSUY9AYDQQcAadsxcdJ2TXoZs7ylmbsrPb2nmGzknr6+RoDTuM290F3dffLPk5c9BRMOUGk06/45gTRwGVKyw7fa9bn3PElWOleovD2K7P6LZPp+VyRqsjclqP8en0/W55zpE54ibt0u5NckzNX/7yl5H31Jd3HcPj2K9yFWVv8hzbz5ZjvhzD0w3ppJNOyqcSGMd6mxw95uYiR0MN1RTkcqi2bNQrEOgBARO4g7TLoccOy3ZWncBzR3I4E9DhzpKz3Po6ucGEguKMvf5OjgZxR3jdAcbtyjLUyKUFBQya7rbv73Yp3xvd9um0vEOFXCKWbn9xLmlv0mAkl47ucuSROndDssseuXTGaW9TuVHI+ZjOHR6KKcjlUGzVqFMg0AECBjVn9bnFws0qbtsod1CX15FKh0RTLz/96U/nQ8KdiemwZMdoONTXGXYOPXZAL4XTYdPIClWMe937XOzuMnaWpoOinSvnnD3JHeuO6aCYIDrO8XP+oLK0uu2cFec5dyG74s9NGsrhDmffpSJ4X3kee+yxnC+yrH5u13BQtAPqXYNYyKWyer7cYOG/TdjyREIccu9ga2cVqoODoymXbqCRh9+cV2lHtnq2prrvKztcYES9dd6dg8edPeegdveLC1egItvljdSqg2QHuHZw04rDmE26Drv2DBUSZq4HdKuLg7btTHUjkqsdnannO9qJWnnQQQdljBzmj2g6gN15f1yj6u8AaoeAw9z5eBYfyqPsvuNgeVj4HtzZlAlT6qQ9m8pZJZdsjL1oX9+0KKL8aEPfpMDB3nmVTfkpj3ZwYL9DsNkB22QvF198ccacHUlu4tGO2raaYGshpAyOdXLmpKsCpbryIBDt8Gmyn7r6OozdeZnwd1akvuMaSmdH6otUNO3VhIszX9mM81Wdd1luyWHL1US5ZFNu+tJfLDJhq4+4VefAAw8cWV/206raNdmk+8erdudw/rr6K5d6uRmn3HPu5jD9y0UA+oOxqNSndSxyxaEzXcvZkS4dMC6tuOKKjfZdrX9r/9AfmvKrkkuk1/9TI/UdeJWd4E39YEzIaQfD+4A/EuRywJsgChAIDAwC5UpGCiKSYWBt5x41cbhFxSRmEkAAXPFmAnUlWrmr28Dvvmz3Z8vPpO7AcZMEYmdy96yrGREXE4lJwm0YBx98cJp11llz3o5IkU9JiIRzGxFUBMuNLHPPPXeepEyGJmCDukkdWXILCMLE/en2GwfU+75nTNCFXLqhgpLjLmzJ/7v5ghpksnXdmrqbuEwiJmTvI2uIltAC9UG+kdFqKgpZ6/d9Q/0dXO6GEMQFQXTDjskViTWxu9UFiUJoLr300qwmq5Pbc5QNARDKgOAjTrB0gxDC7oYn5xe6JhQpp1AXdzMMPYdkFbf4Mccck28iMikiDdpO+S0+TM6uCOR+dR8zok319m/yQkK5K5FAdXBrUk/tWRYWTeUs7WMSVhYhGhYAyDxyC1PlQH5ghQQ01RuRhAW8veM2JfbELY7EsA03mCCwSKa2r7qG1YtNaDcLATaH6FkMORC7rjxwa8IHyWuyn9b6ioe2IHM2pX5kEagPWoBpM5cquG3HLVRNuLAdRw+xfViqj0VBHblkUxZYysjekG2HzrtwoHgw9EOYOQi8lZzW2aQ4bmEoxe6Uv6n+xc1vccmub7zxxtyH9Xfkkm02jUXGE+3o+lBJf9c++kuTfevfJRl3quW0yGjKr0oOtQ1iqS+7q95YocwWkU39QL9z/iWMWy/FGJgZoW+/GuSyb/GM3AKBQYFAuWrPZETxQswoUwa6umSiQSCRSVfgSQb49957LysJPZFLxNWkvOmmm+YJ0KRcdYsjKsigSZ1qiAgZ2KtnMyKMSLDJguKKHCBYrsGkDihbuSsa4TPhIqFuUymkj1JIFTQx9kQuEQkTiInCxqZy1aIJWVnKdYKIn+/CsPUavabvI8MUwjJZU3LUwTV6yDhVrPzmLmDkyUReMICPWFm/lcOZEQhX62kPddNOymaCpeAgfYiqyRMRo75VYy4peDYaIKjO4FNvEzuCj5TBXaJMIpvwQJ4KESs2pTzw6Kk95dWunFVyiQTKU32QP5O28kmUPuVhu035sS/YletNy/3z8vXf2tJviKo2cCtQ9eYehE59iyKmzt5hx03l8b0mfJS9yX6Q92p9lQWpQvjcgkOlpHCzh6pbvKkc3tdP2GK5v9wd3MpWRy6Rb4slSb9GfNi2/lSuq0QM2VG5srSMGb5TZ5NstWp3FMGm+lPcLa7YuPag0mobZYeDsjWNRRa6TWSwyb4tWlrJZekfbL4ncmlRpc7GLZ4USVshrcbWpn5AzfV869W4g2IC6aCQQS47ACkeCQSGGgJUAGpIIX0mT25XA2W501ydkUobWCgVXG6rr776SNcvxcx1jYLSq+SyqDxFuaxuGPE+8mLyrJJLioONPcgNZbO4XqsDv0mnLgCeK1bZqCMmTG5CRAMRoUi66qy4lKuTSBO5LBM2dRZBKIRbGU2eJmQTHHLYmhADWJXU9H3/zl0NI4kL3EStHogg3MvGBWoc1dcEX+4O9zflCYEsdypTQimP2hYRMHkh4MiBshdyWY01rZJLxIUCSZ31jnpQ30yQ5f5tZdUG2mKZZZbJeJeJ2G8IK3yQiZ7a0/OdlLPVfQiP6l3m8mEziF5TflRIO3OLHYiTE+7ADtWbkqYebNOCqdVeqJblTnbfq7rtm8oD/yZ82tmPBUo1lm/OOefMqjrFWj0lCnIruWwqB7Jv4VVd+LCd0lZVG2bf7KksINkc1VJb+81CQ7+CX91C1DN1NsmuqnZHDW3qP0iZ55Eu5MuiFOEvmw0RU+S+bixCIKtk0EJLPhYmTfZdrX9rrG8ruazmV+xSn6/bAPXiiy/mPtzUD8o4aTGJ7A+1FORyqLVo1CcQ6AABqgAyQtWiipVVNPeOCaWkPffcM7uhKF6UkapagVCKa6Qm+u9CMorCV8gl91ZxUzWRS6TIpPLOO+/kidtqH2mkSpVU7kL2uwlHomhQcdzHbtKhUHJXIguUQJMUpVIsnqQelExxZ1Vy6VvqgYxRIUwW4rwoXr5BxVJGrlXkiauZ+74E83PFU2IoJ9XjRZq+b6JTLoqbVEij8msXsZJFmUMaqLMUq1ZyedVVV41USyjJys8NJ74OIYIBN6iFRE/k0rcfeOCBTPBNyEiqiQ8uSINFRiGE2lP5kKcSDlEll/DtqT3FEHZSzlZyyY0tLAHBlYRUWOT4ZlN+XJTU4mIHSLMwgrJbXJ7czPqFsADKXDX5f8d2sR8JLkg2QttUHu3WhE87+2HHVXLJFrm09SPto69R9VvJZVM5kEj2XUImlF9IjA18dcolBU4ZJB4AJEnfFq4BQwQX/ohWa2IndTaJcFbJpfCGdv2Hcg4/NmgMMvYUcsk2keO6sYhHAy4lvIfd8wAgl032XSV2deSyKb9il8pD4RfXjDRLCLkFmoVKUz8o3o6ywO9g2B5UjwS5HFTNFYUNBPoGAXFBlClHEFEQJZOWmDNkgvJUNr34bxODGD0DpYmGC9SEgcgY+E1WJhZk0MRhVd4TuURkbcyxKcKEjmxQ0pA6E588TSAllZW+icdEKX8DPwLITYpYIAFiw2x0QGT9LXaTAoI0iolSDxtaymSn3kgZdUR+CKMJHTGEEUWRcoT0id0zISNgFFmTl4nb38qPnFTdqTCs+76ywJlL1mRermk00ZhEOyWXJi/lNVGJV0Vu5c11jaA1iwoWAAAgAElEQVRToRFt5MsCQv5NyiVckBYkxvfF55nYxfLJnwIGN9+gZtoE00SekLie2lPcYiflbCWX7As55zKl/lLaStxnU34WCNpB3J1wB7ZHLS7kkuIJG6qcZ1tj4ODAntgBAsP2xdgil03lYZ9N+LSzH3ZWJZf6l8UUNRWu7EYZkHp10Vf0w6ZywFkeFm5CUtRjk002aYy5ZL/Fba/e6myhUhY2hVzXqW3GgDqb5Gqv2l1ZKDb1nxK2oF2LQlrIpcVu01ik7PL0R1mQPX2b/TbZd9nIpV6t5LJ4eOryq9olYk/ZpnRSZC1mfE98cFM/gAGCXhbefTOyd08uQS67py2iJIFAvyKANHEBc8VKFD2ECsEsSawdMmmnJnctYkUZlMQhGoxNxN4rE7UNCFx4PZFL7it/qJ8mMcTPZgDKiImQulN10fsmF5R/L8kEggypi3Jx31GYTBjKYQMLt5TJWjJJmzi5u6lXyEGZ6PxOdeRyFb/I9WZSMcGU30wW3vM9aqIgfgkpQQpa3WMmtrrv+464LJuf1NcfO3G5HevIpQWAyayqcCIuvks1lZBy9TXBIrylHbWhCY8ChRSVenun6hZHRhH14npFNGFlM1AJYfCOtoHZK6+8UkueLAgoyT21Z9ld3lM5W88ThBmckcNSHqSJgt1Ub+5JoQgWM5KyI9DFZqlo8rTzHPFqTTCmAiMKyi1cQ9gEhaypPPCrI5dFcWuyn9b6wpqNS0I+YKs99UVtRlGXFyJVhwtixy60uz7CZt59993sii1qdLU/IYcWn5INZfp9WTBRE/V3Kn1d0h/rbBLBrdqdd9v1HzHHQkGMTyWmWL8o4SJNY1GxKbbBjvV34TIlBrjOvqv1KDiVI8q0O5uqy6/aTsYIJNz4Jdl4ZdOYcbCpH1jQiHMvanotoIP4H4NcDuLGi6IHAmOCAKJm0m09wxIR47KljlRjHn0LcTPBOq6k9dgdA7/EPdlp8g411B8TmmN1kCS7mJuSHaQmVHGZ1ZuF2n1Tnagg8q5Lvm2zAHdfa6KYUmrtym09GombkCrIpUo9bUpN30dqkBzvVxXPTvAzkVNrueSQhtbblMTqwcdNINrNn6b6l+9pD+qsUAnvlSRm0yYLxBKJa8WhCdNO2nN0yolEOCkA2afUVsvTLj9KtnZqtS/twwVMSZJfa0JO9AkLBd9CyrhZqcFSu/K0a8tO7Yd9UuS5xX3fe88++2wmelRQCwf/3a4ck0wySVZ8xWsil+2SPCmzrf2BUmrhVo4kas2jJ5tsfb7T+teVtd1YZEGEGLfegNNk3z31t6b8qu8hohYr1HR9pKSmcY3aKUYUSR2KKcjlUGzVqFMg0AECJiIub4oM12ykwYVAmcjFgUYafQSEN1hkUQWbVCQkWUydBRc1DLEUW0h9Gg4JLly+yKDwAQSqLoVNdmYNlFdxuzwRnSzUOsu1u54Kctld7RGlCQT6FQErbcrbQFzB2K8VHYIfE/MoLtKGjUijjwC1krucgsSd2ZTEz4r3cx6pRZm4zeGSuH25eYW/VFW51vqHTXZmEdzvwhuGct8NctmZLcRTgUAgEAgEAoFAIBAIBAIdIBDksgOQ4pFAIBAIBAKBQCAQCAQCgc4QCHLZGU7xVCAQCAQCgUAgEAgEAoFABwgEuewApHgkEAgEAoFAIBAIBAKBQKAzBDoil47hEMjruAZHZjiiwBlUrfeKdvbJeCoQCAQCgUAgEAgEAoFAYKgi0BG5dINFOcPOOVuOI3DG1NJLL53Pp4sUCAQCgUAgEAgEAoFAIBAIQKBHculgUKfk2zLvwFpngTmx3i0D1bs9A85AIBAIBAKBQCAQCAQCgUCgR3LpZgYHo7r9wQ0XbgFwc4MbC1zp1NONDwFxIBAIBAKBQCAQCAQCgcDwQaBHcgmK6h2gBRrXijlQNlIgEAgEAoFAIBAIBAKBQCBQEOiIXLomboUVVkhOlS/37+6zzz6NV0AFvIFAIBAIBAKBQCAQCAQCwxOBtuRy/vnnTyNGjGhE5s9//vOQvRdzeJpD1DoQCAQCgUAgEAgEAoExQ6Atuezp3kt3rb700ktjVoJ4OxAIBAKBQCAQCAQCgUBgyCDQllwec8wxaZxxxmms7O67757efvvtIQNGVCQQCAQCgUAgEAgEAoFAYMwQ6Cjmctppp611jz/33HNj9vV4OxAIBAKBQCAQCAQCgUBgSCHQEbn84x//mGzqaU3hFh9SthCVCQQCgUAgEAgEAoFAYIwR6Ihcnn766Wn88cfPHxtvvPHSjDPOmM+8XG655eKGnjFugsggEAgEAoFAIBAIBAKBoYNAR+Sytbr77rtvPuPSH+ddRgoEAoFAIBAIBAKBQCAQCAQg0BG53HrrrUcqlzb4UCwnm2yytNlmm6X7778/kAwEAoFAIBAIBAKBQCAQCAQyAh2Ry7qYS7vEl1pqqTjnMgwpEAgEAoFAIBAIBAKBQGAkAh2Ry4022ijHWkofffRRevzxx/N943EMUVhSIBAIBAKBQCAQCAQCgUAVgbbkcqqppkqHHnpo8vf111+frr322rTBBhukqaeeOs0yyyxpvfXWS6+99logGggEAoFAIBAIBAKBQCAQCGQE2pLLq6++OjnjsqT3338/ffKTnxz5/3EUUVhRIBAIBAKBQCAQCAQCgUDHyqVYy3fffTcddNBByW08E000UbrnnnvSrbfeml588cX894cffhiIBgKBQCAQCAQCgUAgEAgEAj0rl+4W98ducVdBLr744mnRRRdN//3vfwO+QCAQCAQCgUAgEAgEAoFAYBQE2rrFEUvq5fe///109NFHpyWXXDJ99atfDRgDgUAgEAgEAoFAIBAIBAKBWgR6JJd2h3/wwQfJ+ZYjRoxI4i5LWnHFFWNDTxhWIBAIBAKBQCAQCAQCgcBIBNqSyz/84Q+ZUDalIJdhSYFAIBAIBAKBQCAQCAQCVQQ6OucyIAsEAoFAIBAIBAKBQCAQCAQ6QSDIZScoxTOBQCAQCAQCgUAgEAgEAh0hEOSyI5jioUAgEAgEAoFAIBAIBAKBThAIctkJSvFMIBAIBAKBQCAQCAQCgUBHCHRELs8555z0mc98ZpQM//3vf6ezzjor/f73v+/oY/FQIBAIBAKBQCAQCAQCgcDQRqAjcumsy0984hO1SDiqaO21105PPPHE0EYqahcIBAKBQCAQCAQCgUAg0CMCHZHLX/3qV/nqxx/84Af5dp4jjzwy3zF+3nnn5QPWzzzzzHTyySf3+LF4IBAYSghccMEFuR8MVHrooYfSl7/85YH6fBru3wf8cMcg6h99cCDHoOiD3TEG7b///qPMQz2Sy0996lPZ7f3888+nb3/72zmDc889N80xxxxp+eWXT9dff336xS9+ke8fjxQIDCcEPlz1iOFU3ahrIBAIdCMCH32YPnH17gNSsu9+97vp0ksvHZBvl48OdBni+/U20CO51IAOUx933HHTiy++mG/omW666dLbb7+drrnmmgRYiuZtt902oAYWHw8E+huBIJf9jXh8LxAIBEZBIMjlgBLcIJdjQC7dKf7DH/4wfe5zn8t2/eqrr6ajjjoqLbzwwmm22WZLG264Yfrwww+j1wcCwwqBIJfDqrmjsoFAdyIQ5DLI5QCqx03kuiPlsvSoCSaYIH3605/O5DJSIDDcEQhyOdwtIOofCHQBAkEug1wOVnK51lprpc022yxNOumkH+tJK6ywQi3RXGyxxdI///nPYb2DPDAY2EF3jTXWSL/+9a9z+MbYSkEuxxaykW8gEAh0jECQyyCXg5Vc2tBjY8+77777Mfe3DT7OumxNl19+eY7BPP744zvuH0PtwcBgYFv0rrvuSuutt156/PHHx1pBglyONWgj40AgEOgUgSCXQS4HI7l01Mqdd96Z7r333rTlllt2ZO51xOpLX/pSOuWUU9KEE06Ynn322bTOOuuk6aefPv8bRfQ///lP2mqrrfJh7ccee2wmslNMMUW6/fbb07zzzpsmmWSSdO211yZb3n/729+mZ555JsnzhRdeSBtssEF6/fXXa8v2s5/9LJ/ROfvss2divOeee+ZvHXzwwelb3/pWfofCteOOO6Yf/ehHafzxx08zzTRTeuutt9Kf/vSntPTSS2f1yzeee+652m/U1e3CCy/MBPvEE09MJ5xwQvrqV7+a3nvvvfxdO+y/853vpB122CGNN9546bHHHkvrr79+2nbbbdPiiy+epp122hx+AMcjjujdjmRtpMw2XcHxiiuuyGVeddVVc/1gcccdd6Rdd901rbzyyhkPm7WQsI022ijNOuusfYr/b37zm/TUU0+lueaaK7cRwqdsfYl/Xd0KudTWjsqaZppp0iuvvJJtmO30Bf5BLjsaDuKhQCAQGJsIBLkMcjkYyaU+ceWVV+bNPIjISy+9NLKbPPLII7UbeerI5amnnpqJ1AEHHJAJ5WWXXZYWWGCBNOWUU6Yf//jH+d9N+meffXY+M9PxRgimA9qRSgQF+VxooYUy6fMH8TrjjDPSRRddlE477bTa7isf5NV5nAceeGDO86STTspndS666KL5Hbvht9566/z7ZJNNlvbaa698tBIitttuu40khIcffnjtN+rq9s1vfjOTywcffDDntfvuu+ejnJDHRRZZJP8Gg4svvjgTwEMOOSTNP//8+Zk99tgjcavLo5TRh9UDTq1p3333HakgI5ACbOUJx9deey0/jpBfcskl+Vgp/77uuutmrP07AqjuyOjTTz/dp/g7gP/RRx/NeCPa2uq+++7rU/zr6obcI7Kbb755JrZsTLtaYKy++up50dJb/FtxD3I5NmfMyDsQCAQ6QiDIZZDLwUoum27oofxVyWbpCHXksqhlVNDPf/7zWZk7/fTTM3lD7uw8/8lPfpIVxJ/+9KeZXFE2ESIECzFFgiiAiOWaa66ZCadjkL72ta9lElqXkEtEwu72TTfdNIkfRTSayCXCjKA5IJsqussuu2TiSsnceeeda79RVzeEEoGkBPrjGQl5/N73vpfeeOONXIcvfvGLacEFF8zfmHrqqdM888yTySHVlqKqviUhvkhoa0JGC4lU9iWWWCKttNJKIx+jhF511VUZJ+krX/lKPlIKMS//hqBSYIUy9CX+bGfjjTfOh01Tkf/1r3+lq6++us/wr6sbZfyXv/xlJpdszBmtf//737MiPN9882Vbm3HGGXuNf5DLjqa6eCgQCAT6E4Egl0EuByu5RMzsFG9NXKtIV2tqJZcUQG5Krl+TPBc3cunmH0QKSUQgfQe5PO644/L/zzzzzJlcIkBuCKqSS8cj+TZiRQksB7y3lgW5dB4n8ubIJO5tKl2VXN59991Z4fLv5dnzzz8/IZqIaDtyWVc38anUQeTy61//epp44omzglkSEk2x5LJFrJdbbrl82xFyieBxT3vn5ptv/hi5lK+6tiZ5uDlJQi4RKMpkSUsttVQ69NBDR76LZM0999xpv/32G6mMCjdAgo855pg+xR+5XG211TLBQ/SQYG3SV/jX1Q2ubAW51I5c/hYKkhAH9aZ29gZ/7vTWsJCv/Pg3/TmFxLcCgUAgEBgVgSCXQS4HK7nsbX8u5PKcc87J5IVbm1qFxCFrVKUbb7wxLbvssjmekxrHZYpAik/shFwiaIgnouJe8+233762mHXkcqeddkrKtuKKK6Y555wzHX300Xk3/OiQSx+tqxvyi1xywyNXyK+4T0QVkfX9b3zjGzn+0I1H/l3oQTtySW1DSlsTBbTEg1bJpbwmn3zyhODBebvttkt//etfM2ml4nKLCxdAornRuYm5rPsS/zpyedZZZ/UZ/sh9a9022WSTXDfkEpF0cgFcttlmm9zm2p/t9Bb/UC57OxLE84FAIDDWEQhyGeRyMJFLbmjqzq233prVt7o7lJuOIkIuvYfIiFPk9hU/uPfee+dd5xRH77o+UhznOOOMk0aMGJE3mbzzzjsdkRtKnff+97//JcfO1LnndepWckk99W0xeV/4whey4qdMW2yxRSO5FFOpzFzkdamubvKHAeLIxc6lDUOkmtp63XXXZXVS4iKH90033dSWXHYySCFRNkAhVkg7hXKVVVbJ4Qc2scCZiopMw95mGP9WSDBMOiGXneJfJZcINOVSGfsS/7q6UcqptzbyHHbYYRl7f4QrsIm+wD9iLjuxyHgmEAgExioCQS6DXA4mcolwifmzs5kKh/y0JqSu7iiidh2J67e4KMtz4gypZ8W121NHFHNZ3Obc7MpKlWpN8lT+pjTVVFM1ktK6d7jUKYHVhBiKJZTq6laeRdoQZ+7hkiiLTz75ZI5/5Kqu/tYTBqPzu53wdoZzB5fk32aZZZb08MMPd3zL0kDg31Mb19WtihHCzT3+5ptv9hn+QS5HxwrjnUAgEOhTBIJcBrkcTOSyavxiFSlwJZnoxehRhJqOAOrTztOSGXJjUwZSJlGkqhtYyuN2nztCqa8SN6qjlKoJYbzhhhv66hODIp+BwL+/2rg3DRDksjdoxbOBQCAwVhAIchnkcrCSSy5Gu53tzLbbWswagtm0W3ysdKBKpv2h8o3tOgzm/AP//2u9IJeD2Yqj7IHAEEEgyGWQy8FKLm3AEbtGpXPIuSSeUpxip67sIdKNoxqBwEgEglyGMQQCgcCAIxDkMsjlYCWXXJI///nP02yzzZbj8uww5hqNFAgMZwSCXA7n1o+6BwJdgkCQyyCXg41c2i1e3SXOFS7Zof3RRx/lOMdyeHeXdLMoRiDQbwgEuew3qONDgUAg0IRAkMsgl4ONXDqIuu4IomLjzm7s7W7xGCECgaGCAPW+eoNSf9fLBQSXduGg0l84DHT91XOgyxDfjz4wnMeA6IPdOwaNWGCBBT7qZDJw1qWr+yiVrix0R3WkQGA4IxDkcnhP7DGxde/E1l/jUpD7gR0Dog92bx/siFw6CH2ZZZbJd047V/LYY4/NG3rckR0pEBiuCAS5HNiJZaAn9pjYundi668xaaBt0Pcvu/yKtNAeZ/dXleM7gwiBe4/fIf33jdfGaomb+kBH5NLd1eIsXavnAG6HhjuOxi0wyGakQGA4IhDkMshlN5CL4ewWDfyDXA7HuafTOnc1ubSJhwvcTTI6suT6Pv/tnnBX6UUKBIYjAkEug1wGuRneNtAN7R/K5XCcfTqrc1eTS1W47bbb0gQTTJBeeeWV9N577+UzLx1J5MYad25HCgSGIwJBLoc3sQi3eLjFg1wOx5F/8NS568nlfPPNl9VK92N/4hOfSK+++mp2jV900UW1KC+22GLpn//8Z3riiScGTyuMxZK6Q3yKKaZIwgtGJ0066aRpoYUWyne9R+oeBIJcBrnsBnIRbvFLB2xQ6Ib2D+VywJq/6z/c9eSyIDj11FOnqaaaKj3wwANtQb388suz2nn88cd3Pfj9UUDEHEFfd911az+3++675+s0DzzwwEayfsghh6Qll1yyP4ob3+gQgSCXQS67gVwEuQxyGRt6Ohy0h9ljXU8u559//nTwwQdnYim9/vrr6ZRTTkmXXHJJbVPVkUtKJ9Vz9tlnzy71PffcM916661p//33T8svv3zO57rrrstxnJ4df/zx00wzzZTd7ibxpZdeOr399ttpgw02yKroCSeckM8Y5KZXtuuvv762LKXs7kGXqH877rhjVmJby2Ozkrxan33jjTdyfZFrRzHZJb/22mvn8u2yyy5puummS+ecc07eUf+d73wn7bDDDmm88cZLjz32WFp//fXzt5rI5YILLphOOumkNGLEiEzG5Q+bcccdNz3++ONpo402Sp4p5BIRXWKJJdKaa66Zsdh4443zu1dffXU69NBDM3Z1OJ955plprrnmyhuz9tprr0z+e5PUX5nmnHPO/H3Xfsrv6KOPznmyDZu7lIlSq30mm2yyXJ9NN900UV+dMvDuu+9mFff2229P8847b5pkkknStddem+3AvyunsAshGFtuuWV65plnaot59tln57aknm+44YbJUVlbbbVVcjbrU089lcvGTm06Q+rr2qo39a97NshlkMsgl8PbBrqh/UO5HNORfOi+3/Xk0uSPPLz44ouZzM0wwwy5NZZbbrnaQ9TryKWNPwiGqyMpdEjGrrvumjcEbb/99plwIllLLbVUuuCCCzIxQYIOOuigTJZ22223kSTy7rvvzv+OaDnIffHFF0+LLLJIzqM1cdEfeeSRadFFF80/cU1vvfXWuQyt5fH9umcRSWd77rTTTpk0/fGPf8wxqDPPPHMmf5///OfThRdemF3XSNNll12WLr744nTFFVdkUoiQNZFLh9SfeOKJWbmEzQ033JDcjIQkKSNChkzL5/zzz0+bbbZZJmuPPvpouuOOO/Lv7ntXp2222Sbts88+o9RL2fbYY4+02mqr5Tqoy1prrTUSKmR5gQUW+Bh0iBPCXJI2RaKRTMROQnpPPvnk3F6PPPJILq98lWGcccbJ7Quv5557LuPjWe2t7WHKrhBBpBB2yDFSaIGh7bXn6quvXtvz3Xfvz2mnnZbtx+JjjTXWyG0DG+0IV+ST7da11ZgOKUEuhzexYD/dQC5CuQzlMpTLMR3Nh+b7XU0uHTlEFTNpIz/SdtttlxUzChsy1JqayCXiddRRR2VyhIRQCKmPlEuKpj8rr7xyOvXUUzNZofghLi+88EJWCBEJSiYFbdZZZ83kSkIwESRnb/aGXLaWB6mpI5eIF+KG1CKOhfjVEZYZZ5wxq4oIHPKlzIhyO7f4T37yk6x0Uk2vvPLK9LWvfS1XY999980qIEKHRFIoqcVHHHFEVkgRbgRNgiHiRsFrrZd2Q3Ypqddcc03Og/JY0hZbbDEKuVRXKmKVXD788MNp7733Hvlv6oc8LrzwwvnffBeGThdAHpFqdfFdxN05qcj+9NNPn8uA8Ks3PNmBv59//vmsgLI7mJW8W9u1Hblkmw899FBWcR38b0EU5LLvB8/hTqyCXAa57oY+EMpl349tQyXHriaX5Sgik/4666yTSQn3LdKAtCCdnZJLxAbZ4sbkPkVqqHsPPvhguu+++/K/F3JZnqXWIZpISyGXFLSJJ544v1cS96wy9kQukabNN988q2yt5fFvVXJZnr3//vtzeZXNpqbzzjsvTT755CMJC9UU+fM31ZF7nUJK2fVsp+QSAaTeFZWVqxiJRszkDwffp+5S65DY6k1J6sPl31qvb37zm5l0UlnnmWeerBZqy5J8gyu6mqil1TNM6xYMyOVxxx03srzqTlEWCkDNRf5nmWWWrFSqQ3kW0UMu2dBEE000klzeeeedORTAYkISBkEFrUtVcrnffvtlF3tRLim0bOH000/PbnnfL+SytBWltDeJi96f1hTXPw5f1SjIZZDLIJe9GUXj2f5GoKvJJTAQQIqc9NFHH2UFjYt8pZVWqsWqEBExlEgY0kBVayU9V111VVYwuTQpg1y+VEju03bkkoKGQHiW2ol0LrvsspmMtKY55pgju3dXXHHFrKQhob5TRy65jOuepdTee++9uVzIH+KEVCNt1FckEJEU24c0OaIJAT733HNz2bjfe1IuudlhACukHZGEO6WUIsstDiexkhRCZfGber3//vv5WaqwPFpx9hxSRTFWNnlVFUGKotjUarrnnnuyQl1SlVwiv9ofaa4jlwcccED+d98VV4swclH3RC5hj9RSrNVD3bRxXYIzd/vOO++c60udbCKXytraVsildqSiIsUWUb5FpbeAsnlKvGe7Ew/CLR5u8W4gF+EWH74LnLihp7/p2uD6XteTS+qSmD4bVkzCFCnkpk4pBD0iglRQw+aee+60yiqrjEIuqVuUNJO5DSFIq9hKaqT36sgldzm3OGJY4jLFLN544405prApifej+CENyk8xbSWXyrPCCivk2MDWZ8WYUsfeeeed5HvIE1cvDGxIefPNN3MMJsKCUFNVJRuBuH25rilrNpfUJYqtTUDIFxK/6qqrZgJPcUOiEWSKYIkrVFdESlwiUgg3ap/3uNWr5FK9qK6UPhhLNsI07UxvwrC0KTe4+iBW8qwjl4i2TT8ffPBBevnll/NGKKScGx8xbVIuucwPO+ywjLE/6tx0SD8iKOZW3bWr77SSS8SecunvurYS1kEFFlIgLlO9LBZeeumlHO5xyy235HI3pSCXQS6DXA5vG+iG9g+3+OAifP1Z2q4nl2MTDORAfCIXLOL32c9+Np+j2UlCAhE+JFdsYtnlXX33pptuysof8oA0dJLqnkUYuY8petWNQ3XPKosbjSiKYgcLCUfyKLnVhICKDUTgKa/esVOeO5lCW7dJqfq+71NG//a3v7WtGpy5oR0jVafwdoJLb56h3CLdQgSUEfnrqS4lf0Sc2un9dpghoBNOOGEmkJ2k3thAJ/kFuRzexIKNdAO5COUylMvY0NPJiD38nulacnnXXXdlBa0pUfo6JYJju1mRODGArUks5z/+8Y+x/fmO8ueSRoaqCTmu2xTVUYbD4KFuxizIZZDLIJfD2wa6of1DuRwGE+FoVrFrySXS40iZpsQNSZmKFAgMRwSCXA5vYhHKZSi3QS6H48g/eOrcteSyFULuWmdKcg1zc0YKBIYzAkEug1x2A7kIt3i4xcMtPpxnoua6dy25tAPbxg+xbb/73e/yZoyiZNpQ43zGSIHAcEUgyGWQyyCXw9sGuqH9wy0+XGegnuvdteTSodY2mtjN7eByyaHUU045Zd6cUQ777rmK8UQgMPQQCHI5vIlFuMXDLR7kcuiN60OpRl1LLk2ertJznI3NPTbGuBnGcTTOTbTZwv3NkQKB4YhAkMsgl91ALsItPrzd4sO5/WOB170LvBELLLDA/x1+WJNMnuXaRzfOOC7INYtuy3HAdTkTcDgSi6hzIOAwe/e6RwoEAoFAIBAIBIYjAq+88kq+pKQ19Ugu3ZjioHMHjztP0K0rDsl2u0mQy+FoSlHngsBAK5fx/T/lO+kHMkUbDGwbBP7DGxPPqmEAABbESURBVH99P2ygO22gR3LZbuAOcjmQ01p8e6ARiEGtOwe1/rSLsIHhbQPR/gPb/kEuu5dctyWXu+66a75erykde+yx+eq9SIHAcEQgJpaBnVgGGv+Y2Lp3Yuuv8WigbXC4fz/6YPf2wbbksr86aHwnEBiMCAz3gX241z8mtu6d2PprPBnufWCg6x99sHv7YJDL/hqF4jtDDoEtt9wynXbaaQNWr/j+wOKv4aMNBrYNAv/hjX/0we4dg4JcDhg1iA8HAoFAIBAIBAKBQCAw9BAYLXLprEtbz3feeef01ltvDT1UokaBQCAQCAQCgUAgEAgEAqOFwGiRS9c+LrHEEnEU0WhBHi8NNQRsept44onzUV2DNX3qU59Kk002WXrhhRc+VoVJJ500/ec//8k3ctWlprrXvTcYcJp11lnzeb4lTTDBBOmjjz5Kb7/9dmPTTjHFFOnll1/+2O9N79U92y02o81c79tal57K3Ju27smeBhILbSa1CiY9lbk3bd2JPfU3Bp/97GfT+++/n959992Rn+5Nv66Wt6l+vbGR/q6/70011VTppZde+tinZ5hhhvTMM8901K/LQ0249cZGelP/unJ7f7rppkv//ve/R9pyJ2NvXT8fXTtQhiCXvWnJeDYQaEFg3333TSussEImHzripptump566qlBhZPFohu3XO366U9/Ou2///7pueeeS2effXYmleOPP3669tpr00EHHfSxetXV3fN17w0GnMTv+eNa209+8pPpnHPOSdNOO22us/N+11133Y+RbO2uXm+++WYyQcPnpptuqn1v+eWXH+VZmHZDUvYLL7wwt7309NNP53ON6+pXLfOMM87YcVs32UU31F8Z1B8B0O4m5Q022CBNP/30bfuABVmdjdS1dZNdNC3a+gMXhG/xxRdPP/jBD5Jbfk488cT82d7061LOJiyaMOyW8cA5uSuttFL2xC666KK5Ouutt17afvvtk8PB1evKK69MZ555Zo/jQV2dnn/++Y5tpDfjQV25S1vMM888ubyHHnpouuKKK2rbszpHNfXz0bGDqt0GueyPXhzfGJIIGHh+//vfj1TwTVAGpO22227Q1Bdx/O1vf5svRqC8/vSnP80KJqJpskWWv/CFL6SLL744LbPMMlnFlJrqTuVrfc9tXtddd11X4zTvvPNmcjjNNNNkcrnaaqvlsB+Tr+SGsqOOOipdfvnlI9v2N7/5TZ44kOkddtghv2OCrntv2223HeVZ1+d2Q9pnn33S3HPPndZaa628QFLWk08+Od1www1ty+wa4E7bus4uqvY0kDgsuOCC2e4tsNi1/mAhoU3b9YE11lij47ZusouqPfU3Bssuu2zaZJNNcv8+77zzsu32pl9X268Ji6WWWqpjGxmIcZMNzzzzzGnyyScfSS5vueWW9LOf/Syde+65uU/oz0ceeWTb8aAJN2PE2BgP6srNfoznyPCEE06Yx6tf/epXPc5RdeMYwlk3t/WmH7cllybLunMureZdexeHqPf3cBDf6yYE5ptvvnTKKaekhRdeOBfriCOOSF/84heTgXYwJf3ZCptr/5e//GW+kcsqHqEyqBoDXANLzbrvvvty1ZrqXohY9b3DDz88qyPdihMCod4mN2QRufzRj36UvvKVryR3h0vXX399uu2229IhhxwysmnvuuuuTL7/+te/JmoVcmpgr3tv1VVXHeVZ3+mGdMYZZ+QJdqKJJsrnFp911llZ+airX7XMVJFWG2lq6zq7qNrTQOOgbT/44IOsXHIPa8+6+lXLvMcee3Tc1k12UbWngcLAwu+aa67J5LI3/boTLChsndrIQI2bFnn6blEuuYep10IFzj///Ey477333rbjQRNud999d8c20tvxoLXc7Ed52fLaa6+dTjrppPTEE0/0OEfV9XMenLq5rTf9OG7oGageHd8d9Ajo3AcffHBWPKQ999wzX0c4UIPkmADKLbTffvtl5ZVqc8kll2RCddxxx+VsC7m8//778/831X288cYb5T0qmMGqW3G64IIL0iOPPJIOPPDAfJWcQd5k44rbddZZJ9cX0YDBYYcdNhJmEwcX2qOPPprmn3/+PBjffPPNte/ZBNn67EILLTQmTdZn7yI+FFsKy2KLLZYJtbYy6bQrMzdeq400tXWdXSAnxZ76rDKjkdEcc8yRfv7zn6cnn3wyk0vufmq7hUa7PoAY1tlIXVs32UXVnkaj6H3ySpVc9qZfV9uvCQt7Mzq1kYEaN+tImnZFziy4t9566zwOtBsPmnAzrnRqI70dD1rLTWmnRn/729/O45DyI8k9zVF145iFU917venHbcmliXLEiBGNBnzPPfc0Bvr3idVHJoFAFyNA6eFSQEbETh199NG5tFS6wZTElxmYuL5LHRAlsYRu6TLhUh8MfiVGrKnuVMDW9wx4BrtuxEnc2Y033pjeeeed3GQ8Mv779NNPz3F3FCyJi9iNZNW4KAqBuKZf//rX2Yuz0047ZdWPq7H1PUpo67PlmYG2FWoHxVK5y0KCa5xbrV2Z62ykqa3r7KJqTwOJwd57752WXnrpHPYhcYvrC0IF2vUBi7BO27rJLnoTZze2MKqSy97062r7NWGB6HT7eNBK0uaaa65EzbfwMZYrf1P9Svs14Wax2qmN9HY8aC03u6Wy/u9//8vjmL/1YUSx3dhbN46tueaatXNbb/pxjzGXCmslNuecc+aAb24xqgbA/F1isMaW4Ue+gUA3IyAuBSG7/fbb02WXXZbdiVSQwZK4vClUBxxwQI7PKWmbbbbJCpYBDGmiKlAhKFt2lD/++OM5Jqe17mJ+6t7rVpzU3yK6JLF3gvmFCWhP7uypp546nXrqqTnwn6uMe9+kIk6NMgAfKhdSajMUlbP1PZumWp/dbLPNusJMKO6Ua2rdbLPNluvKRSiWtK7MCIMJiarZaVs32UU3ALDKKqvkCVjcsQnZJL377rsnima7PoCQd9rWTXbRukN5IPCokkvf702/LuNBExa4Q6c2MlDjZitJMw46MWKXXXYZ2Rx2jte1tcWpP3feeWctbsSHTm2kt+NBa7lxtHLigbAk9XDJh/ZtHaeFAbUbx5SlN3ZQZ7dtyaXCcnMYGKpJBxx33HFzoQWDRwoEhisCSJi4O+nFF1/MLomB3AHa23ZArChQ1WSn+IYbbph+8Ytf5MGK94LL2IBEwTOQ2klYV3c7j+veGyw4Fbd42d1sDFR/u30RDgolIm4TiNjKgp14Pav9ssu+9b2mZ3vbXmPjeW1sYjeBqrdNJhSPpjLDaMcdd0x/+ctfOm7rJrsYG/UZnTxNxI51sWHhH//4RyZE1Kie+gAC3mlb1z07OmXt63fM4+rPjSr1pl9Xx4O6+jVh2E3jQStJE/5S3WuiT1D+6uqnn1iQWaA01ak3NtKbtq1z55f3tYuxCbGtK5cNOz2NY72xg16TS6ybcVhdGVDKcRzIpk5o1WLFEikQGM4ITDLJJGnKKafMsXdDLZk4qZTlDDwKDwJFqZOa6t76Xrtnuxkz8XfqXhQm9bWTtMSH8ew4G/Ohhx762KKi9T11bHq2W+pvY5d6UmdLqisz8rnVVluNPA+zN21d92y31J9CjUi0qok99YHetHXds91S/2o5Ou3XreNBU/16YyPdiEcpU2v9LDI333zz3B/ajXG9sZGxUf/W9ux0HOvUDnpFLoGB9b766qsj44cEc956662Z1fvDfSQWK1IgEAgMDwQczcFdPJjU2b5sGce2WEhQb4drsrnhoosuGq7Vz8fTDOc+UG344Y6FGOMHHnhglAVJt3eO/hjHGt3iX//613MAe3EHFbC+9KUvZQlYvI3dRHYaRgoEAoFAIBAIBAKBQCAQCAQg0EguyaGIJbc3//zrr7+eEeMSv+qqq/JBy1YtzlGKFAgEAoFAIBAIBAKBQCAQCLQll34UV/XlL385I8U9bku+uBxHk7hzc/XVVw8UA4FAIBAIBAKBQCAQCAQCgZEItN0tLpjbERrlZo3ylhgD5+Ihm5ECgUAgEAgEAoFAIBAIBAKBgkCP51x60OYdZ35RLO0cDVIZBhQIBAKBQHsEjJuuA33qqae67lQNY7njgd5+++0BL5vd9sKv4BSp/xFwg4w5ve7MTQKTfRZOwoiTYfq/bQbzF9uSS0czUC4plccff3w+kHP22WfPO0Xdo2zHXKRAIBAIBAKBjyPg0OIll1xy5D8KI9p444275tIJ13H643zLE044YUCaz3FOP/zhD7NoIb3xxhvp0ksvTQ6yH6i01FJL5b0Eznx0Z3wnyTxZDt620bUkV4K66emOO+7IB+2PTnI1X+steY4C+973vpcPuG9Kzt79/ve/ny91cFZ1U0IsXYbiXnHnMbYmeyxuueWWfFuLY4ciBQKdItCWXF599dU5xtJJ7TbuuA7NGWgGg7feeutjg2enH4znAoFAIBAYyghst912mUg6WP3BBx/MXh8KkMO5B+r+5Fa8nVHsfD7k0vFy/Z3E8pezUh3rZMOoG3IQKbH8yPhAJGQXuXRXtqP4OkmFXFL3XBNYkkPoCTLmT2Fko5OQS2dKP/zww3neRQZdYOKGpHbXzLphxSHYzmRtR9YdKSVPNzJVzzetltVh3AsssEA+JcbtXJECgU4QaCSXViwMWCd3W4fVzec+97nc8VzptPbaa+eB0oAZKRAIBAKBQOD/EHAVqLt93Vnt3nGkwI0ZCKZbNbjKHfPm2jjEgUKGCFjII3uUqWmmmSY/b4G/yCKL5CsojcXIi/EYeXVFHeLqCl7qlGfdi0w1lfd7772XVSe3KfE8zTfffPlWnXnmmSerlc4p9h6CgYDMO++8mdwVIYGHylhPdSt3Fbux5rDDDsvzgPc9y22KmFxwwQX5VhBkxZ3ks8wyy8fKULWPslm0SpKoceYV9ZCPA6p9W/6u1kT4rr/++vx7UfPcqgMvSh0lT7nk4yzSf/3rXxlXyf3p8FI2bXLNNdeMgpNj9eCAvDk437d9p64MyJij+NxuZA6ceeaZs+u4jlzaDOuMaO3pMhKLD0qnC0rcJ+1b6iW5hrOakEu/L7744vmfKaOuJXzyySfTXnvtlZVJ9uYKz6233jqtv/766aCDDkrTTz/9SHKJ4O622275ek9XODvw36Hfyn7jjTfmBRB7amo3C5HjjjsuX3nKliIFAp0g0EguDTQM14BBMv/d736XO7j7hXVYKyOrMauySIFAIBAIBAL/F5/u+jikcaGFFqo9bN41msijSd3fk08+eR5nkR6ky7tIm4OOpVdeeSUfAYfgIT8IlasmJcRquummy9+x8fLiiy/OpA5pm3vuufORcciWa3qRWsm96chNcYsjof4bkVF+KhXCiQCX8f2ee+7J5BNBQm65j80BRVVDcim1SNdZZ52Vr0SUBzKtvJ69//77R5pIuf2tzCGeQ4glZE1ZEGME6K677spEc5xxxslkDDFUDqRNUkf1Nl8h8eJIYevKPsTbfAVfCQlGtKmjrTjtuuuueU5zgYj3kTf3x9eVQd20m7Ii/srTRC59F9GdYoopMln1HSRY+YSdWVwoO+K/ySabjEIutS1ip/6I3sQTT5ztRBt7D67wtZhZbbXVcsjahBNOOJJcwsk3LU5c3YxEatdyh7p2Vk8Lm6Z20wbsJk6IiVGuUwTausXdvoNQWvG5g1jsiBgSq14dzuAZKRAIBAKBQOD/EKBQGSeRmNZTNgoRQoDcQ7/SSitl9QrRQiB33nnnTBooSxtttFEmSogTwobQIUlctcgGculZRAlBseh3/zsC4F0ExJ3HVFO/u/MeuSwXX1RjLpULAZWvK36pWWeccUYmKkiJu6eRUyqmuDvlpZwhNGIkDz/88Bx/T71bddVV05FHHpmvxLQB5Omnn07nnntuJs7VW52KuuuyDsocooNAS/JGlKiNiKL5h3ppzkGuufMRLfMPpVasILcxAix0C7bqgVx7btttt80xlEjWyiuvnMtlPqvDieJX3OJIX10ZkGcksBzHp23ctd5ELv/2t79lRdG99NpIGAAiRzV+5JFH8kLCb1zysK2muphLBBHOFiadkEu2oe0RZm1AuTavy9tvlFk2i4A3tZv2QaDrbDr6fiBQh0BbcskNYaUoWaHqUOR0bpo///nPuZNHCgQCgUAgEPj/CCAPSJ3x0TgpIYWIJ1cyla3EX5qweYXs1kWCEMaiRCF13LuIELcld6p7vY3FyGWJpyvjNFc00khRQ2iQtRlmmOFj5NKFGC+//HJ+riiXp556aiaR1EhkVkL+5KEOiCNXuLnAtyizVDbkEtlTDkoeAoP0ID/IKKWzEMYDDjggUWxLggeFkMom5Kp68xtyKT9EFrlE2kqCj3pSSbmKvXfhhRdmckmlRH4Qy+rOZy52RM7GVO5ziRu6DqcquaQy1pVBjCoyifRyU5fYyiZyWdzONngJWUDqLBQQNm5q5bZz3yKiNVXd4rAk7lg0wAERRaxL/nARt9qqXLIVixjPwU1CtBFv5LKQWgpzU7uxB+ECQS5jpOsUgR6PIiKhW+25rceKWpylFZPOOlzvF+4U3HguEAgEhh8CZZJHskzKlCIuUaQEiTOWUhYRRUqR37k9EarekEvEC0kTp4jMcpkiqr7LvYtkIBXcnsZw5A/5eO211z5GLrm0uWgpcpRP7mAEjdp2880357hJqhbiiJBRuqiCTeSSUsmFTyVFXri8W+P13MmMsCLKCLh4SWRRPZBL3xDfqS5HHXVU+u53v5vd/8gh1a+OXMIVeVd2ZUCMzVGwUP5C2lkk0laHk7hXyiX1Fh7eqysDQo0YitOkvtqP0EQulcGtdrBgB+IW4UHFpFgWMk+5bkcu/WYHOAzUdf/998+Ko/JZiMgLfq3kUpgDou9ZCwaEGNHWtmI1YSWmElmtazffsaDopg1pw29UGXw17pFcDr4qRYkDgUAgEBg4BKiR3LCOoimJkoa0UCiRABO2+EbJTul11lknEzcTfSFBCAg1iXLJbY04lePfqFHcydRQiQsdUeAmLwqYfJFLJGKmmWaqJZeUL0SKe7XkJV8EU0wfJQupLKm47EvcfVEuqYdURM9S6Ch7lLGikiGHFLpqkgeiVDbnUNNggvx63gYVbu5SLkqfzSziBQu5FJdKCaZcck+rP3VNPojrMccckzfRtJLLJpyQQL8p0+67754xqytDOeqntB+ci5JZ6liOIuKGZxMS4ituUdgE5bbsSKdMUxbryGXZ6+A34o52Li75EifpNye4CB1ALimh5SgioQvUaTYgKQ/VGllEKIuSinTWtRs113dspEKIIwUCnSAQ5LITlOKZQCAQCAR6iYAJnmJk003rAeEUJuofwiXmrjcJ6UEuf/zjH2fiRMFEyEqiviEQ3N+9SRRUCmLrCSDqIbbxscce6zhP9bMJB+lrdx4jElgwqjvEW/l9+9lnn22bT7Wevu0dRLjdwd9NOCGzwgOUpxzPU1cG6jM11Xd6SjD0PFsoycLB5iTne1J3RzcpKyJdtYG6vCaZZJJ8IoE4z+J1FAuLGFNf/Vtdu5WwC+Rde0YKBDpBIMhlJyjFM4FAIBAIdAkCVXLJdRxp8CFQYkWVnKoolnIgkv0TNugWBbq1DIg2BZsbfYstthiIIsY3BykCQS4HacNFsQOBQGB4ImBDB9evTSlxqPXgtAFqoVAHxy3VucP7s1YUZuEadapk2WgkXpNCHikQ6BSBIJedIhXPBQKBQCAQCAQCgUAgEAj0iECQyx4higcCgUAgEAgEAoFAIBAIBDpFIMhlp0jFc4FAIBAIBAKBQCAQCAQCPSLw/wARh2CDxq5w8QAAAABJRU5ErkJggg==", - "text/plain": [ - "\n", - "\n", - "If you see this message, it means the renderer has not been properly enabled\n", - "for the frontend that you are using. For more information, see\n", - "https://altair-viz.github.io/user_guide/troubleshooting.html\n" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker_2.cumulative_num_comparisons_from_blocking_rules_chart(\n", - " list(blocking_rules_2.values())\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "cbf0c31c-31dc-4b35-bcec-610b823aa979", - "metadata": {}, - "source": [ - "The plan of attack for Monday:\n", - "\n", - "* Begin testing the blocking rules and combinations of the blocking rules. Ideas:\n", - " * Match comp_num_clean OR name_unusual_tokens\n", - " * Is OR functionally different to the ANY rule splink uses for separate rules?\n", - " * Match comp_num_clean OR name_unusual_tokens OR postcode\n", - "* Find some way to evaluate the quality of my choices\n", - "* Think about any obvious wins that might improve the match process\n", - "* Bring in some other datasets (we can enumerate them while stuff runs)\n", - "\n", - "> More generally, we can often specify multiple blocking rules such that it becomes highly implausible that a true match would not meet at least one of these blocking critera. This is the recommended approach in Splink. Generally we would recommend between about 3 and 10, though even more is possible." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "84090034-4661-4810-9946-afce3a11bd9c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.16 64-bit ('company_matching': conda)", - "language": "python", - "name": "python_defaultSpec_1687520767704" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16-final" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6584bc4..5bcb2ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "company-matching-framework" +name = "matchbox" version = "0.1.0" description = "A framework for orchestrating and comparing various company matching methodologies." authors = [{ name = "DDaTDataScienceTeam" }] @@ -33,41 +33,16 @@ dev-dependencies = [ "ruff>=0.6.8", "docker>=7.1.0", ] +package = true [tool.ruff] -# Ruff defaults mostly taken from https://docs.astral.sh/ruff/configuration/ exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", "*.ipynb" ] line-length = 88 indent-width = 4 -target-version = "py39" +target-version = "py311" +src = ["."] [tool.ruff.lint] select = [ @@ -96,7 +71,7 @@ line-ending = "auto" [tool.pytest.ini_options] testpaths = ["test"] pythonpath = ["."] -addopts = "-s -vv --cov=cmf test/ --log-disable=pg_bulk_ingest" +addopts = "-s -vv --cov=matchbox test/ --log-disable=pg_bulk_ingest" log_cli = false log_cli_level = "INFO" log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" diff --git a/references/README_aspitational.md b/references/README_aspitational.md index 7b8b623..4ba5037 100644 --- a/references/README_aspitational.md +++ b/references/README_aspitational.md @@ -9,10 +9,10 @@ A match orchestration framework to allow the comparison, validation, and orchest A quick overview of where we're aiming: ```python -import cmf +import matchbox -from cmf import clean -from cmf.helpers import ( +from matchbox import clean +from matchbox.helpers import ( selector, selectors, cleaner, @@ -21,8 +21,8 @@ from cmf.helpers import ( comparisons ) -from cmf.dedupers import Naive -from cmf.linkers import CMS +from matchbox.dedupers import Naive +from matchbox.linkers import CMS # Select and query the data @@ -213,7 +213,7 @@ pip install company-matching-framework Lots of functions in the Framework will require a dictionary be passed to a `select` argument. We provide `selector` and `selectors` to aid with this creation. ```python -import cmf +import matchbox ch_selector = cmf.selector( table="companieshouse.companies", @@ -268,7 +268,7 @@ from ``` ```python -import cmf +import matchbox cmf.query( select={ @@ -299,7 +299,7 @@ from ``` ```python -import cmf +import matchbox cmf.query( select=ch_dh_selector, @@ -335,7 +335,7 @@ group by ``` ```python -import cmf +import matchbox exp_selector = cmf.selector( table="hmrc.trade__exporters", @@ -373,7 +373,7 @@ from ``` ```python -import cmf +import matchbox cmf.query( select={ @@ -396,7 +396,7 @@ We want to clean the company name in `data.data_hub_statistics` so we can left j We offer several cleaning functions for fields found in lots of datasets. ```python -from cmf import clean +from matchbox import clean clean.company_name(df, input_column="company_name") clean.postcode(df, input_column="postcode") @@ -411,7 +411,7 @@ clean.postcode("data.data_hub_statistics", input_column="postcode", return_type= Our cleaning functions are all amagamations of steps of small, basic cleaning SQL. Step functions all apply to a single column and need to be wrapped in `cleaning_function` which allows them to be used locally or on the DBMS. ```python -from cmf import clean +from matchbox import clean nopunc_lower = clean.cleaning_function( clean.steps.clean_punctuation, @@ -424,7 +424,7 @@ nopunc_lower(df, input_column="company_name", return_type="pandas") Sometimes you don't need to clean a company name -- you need to clean a list of them. `cleaning_function` can handle it. ```python -from cmf import clean +from matchbox import clean nopunc_lower_array = clean.cleaning_function( clean.steps.clean_punctuation, @@ -457,7 +457,7 @@ To make this decision we need an **array strategy**. The options are: * For Splink, this would mean something like `array_intersect_level()` in the Comparison Template Library ```python -from cmf import clean +from matchbox import clean nopunc_lower_most_common = clean.cleaning_function( clean.steps.clean_punctuation, @@ -490,8 +490,8 @@ We've seen how to make cleaning functions. Let's see how to make a pipeline of t To do this we offer `cleaner` and `cleaners`. Similar to `selector(s)`, they are just ways of making dictionaries that linkers can undersand to run a pipeline of data cleaning. ```python -import cmf -from cmf import clean +import matchbox +from matchbox import clean cleaner_dh_id = cmf.cleaner( function=clean.data_hub_id, @@ -515,7 +515,7 @@ If any of your clears use a cleaning fuction whose array strategy was `array`, t One common task is building comparisons. Just like `selector` and `selectors`, `comparison` and `comparisons` can help us build a comparison object for some linkers and dedupers. Write SQL conditions using `l_column` and `r_column`. ```python -import cmf +import matchbox company_name_comparison = cmf.comparison( output_column="company_name", @@ -569,7 +569,7 @@ Because deduplication just links a dataset to itself, `dedupe_settings` can use Every deduper needs a `dedupe_run_name` name and an optional `description`. The `dedupe_run_name` is used to record the probabilities a deduper generates, and means you can either overwrite a previous run with your ever-refined methodology, or start a new one. ```python -import cmf +import matchbox data_hub_statistics_deduper = cmf.deduper( type="naive", @@ -648,7 +648,7 @@ Just like the deduper, every linker needs a `link_run_name` name and an optional Two important optional arguments here are `dedupe_threshold` and `link_threshold`. These are the values above which we consider a probability to have become truth. If you've used a deduper, the linker cannot run without a `dedupe_threshold` -- see [Evaluation -- dedupers](#dedupers) for how to choose one. `link_threshold` is only really needed to run your final pipeline. See [Entity resolution](#entity-resolution) for more details. ```python -import cmf +import matchbox data_hub_statistics_linker = cmf.linker( type="cms", @@ -731,7 +731,7 @@ To help with all the above we might want: How do we know that our data is appropriate for linking, or whether we need to do some deduping? `cmf.report` can help. ```python -import cmf.report +import matchbox.report report.data( df, # or data.data_hub_statistics select=cmf.selector( @@ -759,7 +759,7 @@ How do we know what linkers have worked well in the past? What fields can we joi Let's start with the fields that exist. ```python -import cmf.report +import matchbox.report report.fields() ``` @@ -786,7 +786,7 @@ Accuracy has yet to be determined methodologically, but some canidate ideas are: What about linkers that have worked well for fields we want to join onto? We can use the `selector` we built earlier. ```python -import cmf.report +import matchbox.report report.linkers(select=ch_dh_selector) ``` @@ -806,7 +806,7 @@ dit.data_hub__companies data_hub_id n3_li_splink 86% 0.97 And how was a specific field cleaned in a specific linker or deduper? ```python -import cmf.report +import matchbox.report report.cleaners(link_run="n3_cms_dun_and_bradstreet", field="data_hub_id") report.cleaners(dedupe_run="n4_naive_hmrc_importers", field="data_hub_id") ``` @@ -815,7 +815,7 @@ report.cleaners(dedupe_run="n4_naive_hmrc_importers", field="data_hub_id") foo@bar:~$ In n3_cms_dun_and_bradstreet, data_hub_id was cleaned with the following functions: -from cmf import clean +from matchbox import clean { "data_hub_id": { @@ -836,7 +836,7 @@ Use cmf.clean to help. We can even get guidelines for using a speficic linker. This can be helpful for tricky `linker_settings`. ```python -import cmf.report +import matchbox.report report.linkers(linker="cms") ``` @@ -857,7 +857,7 @@ dataset_cleaner: A cleaner to clean the dataset. Use cmf.cleaner(s) to help Let's look at a more complex one too. ```python -import cmf.report +import matchbox.report report.linkers(linker="splink") ``` @@ -1010,7 +1010,7 @@ from ``` ```python -import cmf +import matchbox cmf.query( select={ diff --git a/cmf/__init__.py b/src/matchbox/__init__.py similarity index 61% rename from cmf/__init__.py rename to src/matchbox/__init__.py index 41cbd15..6318f49 100644 --- a/cmf/__init__.py +++ b/src/matchbox/__init__.py @@ -2,11 +2,11 @@ from dotenv import find_dotenv, load_dotenv -from cmf.data.results import to_clusters -from cmf.dedupers.make_deduper import make_deduper -from cmf.helpers.cleaner import process -from cmf.helpers.selector import query -from cmf.linkers.make_linker import make_linker +from matchbox.data.results import to_clusters +from matchbox.dedupers.make_deduper import make_deduper +from matchbox.helpers.cleaner import process +from matchbox.helpers.selector import query +from matchbox.linkers.make_linker import make_linker __all__ = ("make_deduper", "make_linker", "to_clusters", "process", "query") diff --git a/cmf/admin.py b/src/matchbox/admin.py similarity index 96% rename from cmf/admin.py rename to src/matchbox/admin.py index 04a3c64..2827799 100644 --- a/cmf/admin.py +++ b/src/matchbox/admin.py @@ -8,9 +8,9 @@ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session -from cmf import locations as loc -from cmf.data import ENGINE, CMFBase, SourceData, SourceDataset -from cmf.data import utils as du +from matchbox import locations as loc +from matchbox.data import ENGINE, CMFBase, SourceData, SourceDataset +from matchbox.data import utils as du def init_db(base, engine: Engine = ENGINE): diff --git a/cmf/clean/.gitkeep b/src/matchbox/clean/.gitkeep similarity index 100% rename from cmf/clean/.gitkeep rename to src/matchbox/clean/.gitkeep diff --git a/cmf/clean/__init__.py b/src/matchbox/clean/__init__.py similarity index 82% rename from cmf/clean/__init__.py rename to src/matchbox/clean/__init__.py index 502f342..d0b4fdb 100644 --- a/cmf/clean/__init__.py +++ b/src/matchbox/clean/__init__.py @@ -1,4 +1,4 @@ -from cmf.clean.lib import ( +from matchbox.clean.lib import ( company_name, company_number, drop, @@ -8,7 +8,7 @@ postcode, postcode_to_area, ) -from cmf.clean.utils import alias, cleaning_function, unnest_renest +from matchbox.clean.utils import alias, cleaning_function, unnest_renest __all__ = ( # Cleaning functions diff --git a/cmf/clean/lib.py b/src/matchbox/clean/lib.py similarity index 98% rename from cmf/clean/lib.py rename to src/matchbox/clean/lib.py index 2c631a8..7c74324 100644 --- a/cmf/clean/lib.py +++ b/src/matchbox/clean/lib.py @@ -2,8 +2,8 @@ from pandas import DataFrame -from cmf.clean import steps -from cmf.clean import utils as cu +from matchbox.clean import steps +from matchbox.clean import utils as cu def company_name( diff --git a/cmf/clean/steps/__init__.py b/src/matchbox/clean/steps/__init__.py similarity index 94% rename from cmf/clean/steps/__init__.py rename to src/matchbox/clean/steps/__init__.py index 2f28975..cbf8084 100644 --- a/cmf/clean/steps/__init__.py +++ b/src/matchbox/clean/steps/__init__.py @@ -1,4 +1,4 @@ -from cmf.clean.steps.clean_basic import ( +from matchbox.clean.steps.clean_basic import ( array_except, array_intersect, clean_punctuation, @@ -23,7 +23,7 @@ to_upper, tokenise, ) -from cmf.clean.steps.clean_basic_original import ( +from matchbox.clean.steps.clean_basic_original import ( cms_original_clean_cdms_id, cms_original_clean_ch_id, cms_original_clean_company_name_ch, diff --git a/cmf/clean/steps/clean_basic.py b/src/matchbox/clean/steps/clean_basic.py similarity index 99% rename from cmf/clean/steps/clean_basic.py rename to src/matchbox/clean/steps/clean_basic.py index 477592b..3eba548 100644 --- a/cmf/clean/steps/clean_basic.py +++ b/src/matchbox/clean/steps/clean_basic.py @@ -1,6 +1,6 @@ from typing import Dict, List -from cmf.clean.utils import ABBREVIATIONS, STOPWORDS +from matchbox.clean.utils import ABBREVIATIONS, STOPWORDS def remove_whitespace(column: str) -> str: diff --git a/cmf/clean/steps/clean_basic_original.py b/src/matchbox/clean/steps/clean_basic_original.py similarity index 100% rename from cmf/clean/steps/clean_basic_original.py rename to src/matchbox/clean/steps/clean_basic_original.py diff --git a/cmf/clean/utils.py b/src/matchbox/clean/utils.py similarity index 100% rename from cmf/clean/utils.py rename to src/matchbox/clean/utils.py diff --git a/cmf/data/.gitkeep b/src/matchbox/data/.gitkeep similarity index 100% rename from cmf/data/.gitkeep rename to src/matchbox/data/.gitkeep diff --git a/cmf/data/__init__.py b/src/matchbox/data/__init__.py similarity index 50% rename from cmf/data/__init__.py rename to src/matchbox/data/__init__.py index bcae080..0caf0be 100644 --- a/cmf/data/__init__.py +++ b/src/matchbox/data/__init__.py @@ -1,10 +1,10 @@ -from cmf.data.clusters import Clusters, ClusterValidation, clusters_association -from cmf.data.data import SourceData, SourceDataset -from cmf.data.db import ENGINE, CMFBase -from cmf.data.dedupe import DDupeContains, DDupeProbabilities, Dedupes -from cmf.data.link import LinkContains, LinkProbabilities, Links, LinkValidation -from cmf.data.models import Models, ModelsFrom -from cmf.data.results import ClusterResults, ProbabilityResults +from matchbox.data.clusters import Clusters, ClusterValidation, clusters_association +from matchbox.data.data import SourceData, SourceDataset +from matchbox.data.db import ENGINE, CMFBase +from matchbox.data.dedupe import DDupeContains, DDupeProbabilities, Dedupes +from matchbox.data.link import LinkContains, LinkProbabilities, Links, LinkValidation +from matchbox.data.models import Models, ModelsFrom +from matchbox.data.results import ClusterResults, ProbabilityResults __all__ = ( # Clusters diff --git a/cmf/data/clusters.py b/src/matchbox/data/clusters.py similarity index 91% rename from cmf/data/clusters.py rename to src/matchbox/data/clusters.py index 069cdd9..8416673 100644 --- a/cmf/data/clusters.py +++ b/src/matchbox/data/clusters.py @@ -6,11 +6,11 @@ from sqlalchemy.dialects.postgresql import BYTEA from sqlalchemy.orm import Mapped, mapped_column, relationship -from cmf.data.db import CMFBase -from cmf.data.mixin import SHA1Mixin, UUIDMixin +from matchbox.data.db import CMFBase +from matchbox.data.mixin import SHA1Mixin, UUIDMixin if TYPE_CHECKING: - from cmf.data import Models + from matchbox.data import Models # ORM Many to Many pattern -- models/clusters association table diff --git a/cmf/data/data.py b/src/matchbox/data/data.py similarity index 93% rename from cmf/data/data.py rename to src/matchbox/data/data.py index 6d4de79..2630d90 100644 --- a/cmf/data/data.py +++ b/src/matchbox/data/data.py @@ -5,8 +5,8 @@ from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.orm import Mapped, mapped_column, relationship -from cmf.data.db import CMFBase -from cmf.data.mixin import SHA1Mixin, UUIDMixin +from matchbox.data.db import CMFBase +from matchbox.data.mixin import SHA1Mixin, UUIDMixin class SourceDataset(UUIDMixin, CMFBase): diff --git a/cmf/data/db.py b/src/matchbox/data/db.py similarity index 100% rename from cmf/data/db.py rename to src/matchbox/data/db.py diff --git a/cmf/data/dedupe.py b/src/matchbox/data/dedupe.py similarity index 94% rename from cmf/data/dedupe.py rename to src/matchbox/data/dedupe.py index 29d4281..6fe7d28 100644 --- a/cmf/data/dedupe.py +++ b/src/matchbox/data/dedupe.py @@ -6,11 +6,11 @@ from sqlalchemy.dialects.postgresql import BYTEA from sqlalchemy.orm import Mapped, mapped_column, relationship -from cmf.data.db import CMFBase -from cmf.data.mixin import SHA1Mixin, UUIDMixin +from matchbox.data.db import CMFBase +from matchbox.data.mixin import SHA1Mixin, UUIDMixin if TYPE_CHECKING: - from cmf.data.models import Models + from matchbox.data.models import Models class Dedupes(SHA1Mixin, CMFBase): diff --git a/cmf/data/exceptions.py b/src/matchbox/data/exceptions.py similarity index 96% rename from cmf/data/exceptions.py rename to src/matchbox/data/exceptions.py index c3cffea..1030695 100644 --- a/cmf/data/exceptions.py +++ b/src/matchbox/data/exceptions.py @@ -1,6 +1,6 @@ from typing import Any, Optional -from cmf.data.db import CMFBase +from matchbox.data.models import CMFBase class CMFDBDataError(Exception): diff --git a/cmf/data/link.py b/src/matchbox/data/link.py similarity index 94% rename from cmf/data/link.py rename to src/matchbox/data/link.py index 48c6846..4d2264a 100644 --- a/cmf/data/link.py +++ b/src/matchbox/data/link.py @@ -6,11 +6,11 @@ from sqlalchemy.dialects.postgresql import BYTEA from sqlalchemy.orm import Mapped, mapped_column, relationship -from cmf.data.db import CMFBase -from cmf.data.mixin import SHA1Mixin, UUIDMixin +from matchbox.data.db import CMFBase +from matchbox.data.mixin import SHA1Mixin, UUIDMixin if TYPE_CHECKING: - from cmf.data.models import Models + from matchbox.data.models import Models class Links(SHA1Mixin, CMFBase): diff --git a/cmf/data/mixin.py b/src/matchbox/data/mixin.py similarity index 100% rename from cmf/data/mixin.py rename to src/matchbox/data/mixin.py diff --git a/cmf/data/models.py b/src/matchbox/data/models.py similarity index 93% rename from cmf/data/models.py rename to src/matchbox/data/models.py index d41240d..9235f26 100644 --- a/cmf/data/models.py +++ b/src/matchbox/data/models.py @@ -8,14 +8,14 @@ from sqlalchemy.orm import Mapped, WriteOnlyMapped, mapped_column, relationship from sqlalchemy.sql.selectable import Select -from cmf.data.clusters import clusters_association -from cmf.data.db import CMFBase -from cmf.data.dedupe import DDupeProbabilities -from cmf.data.link import LinkProbabilities -from cmf.data.mixin import SHA1Mixin +from matchbox.data.clusters import clusters_association +from matchbox.data.db import CMFBase +from matchbox.data.dedupe import DDupeProbabilities +from matchbox.data.link import LinkProbabilities +from matchbox.data.mixin import SHA1Mixin if TYPE_CHECKING: - from cmf.data import Clusters + from matchbox.data import Clusters class Models(SHA1Mixin, CMFBase): diff --git a/cmf/data/results.py b/src/matchbox/data/results.py similarity index 98% rename from cmf/data/results.py rename to src/matchbox/data/results.py index 3f1a25c..5ca1837 100644 --- a/cmf/data/results.py +++ b/src/matchbox/data/results.py @@ -17,14 +17,14 @@ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session -from cmf.data import utils as du -from cmf.data.clusters import Clusters, clusters_association -from cmf.data.data import SourceData -from cmf.data.db import ENGINE -from cmf.data.dedupe import DDupeContains, DDupeProbabilities, Dedupes -from cmf.data.exceptions import CMFDBDataError -from cmf.data.link import LinkContains, LinkProbabilities, Links -from cmf.data.models import Models, ModelsFrom +from matchbox.data import utils as du +from matchbox.data.clusters import Clusters, clusters_association +from matchbox.data.data import SourceData +from matchbox.data.db import ENGINE +from matchbox.data.dedupe import DDupeContains, DDupeProbabilities, Dedupes +from matchbox.data.exceptions import CMFDBDataError +from matchbox.data.link import LinkContains, LinkProbabilities, Links +from matchbox.data.models import Models, ModelsFrom logic_logger = logging.getLogger("cmf_logic") diff --git a/cmf/data/utils/__init__.py b/src/matchbox/data/utils/__init__.py similarity index 90% rename from cmf/data/utils/__init__.py rename to src/matchbox/data/utils/__init__.py index 3540f2a..10193ba 100644 --- a/cmf/data/utils/__init__.py +++ b/src/matchbox/data/utils/__init__.py @@ -1,4 +1,4 @@ -from cmf.data.utils.db import ( +from matchbox.data.utils.db import ( batched, data_to_batch, dataset_to_table, @@ -9,7 +9,7 @@ string_to_dataset, string_to_table, ) -from cmf.data.utils.sha1 import ( +from matchbox.data.utils.sha1 import ( columns_to_value_ordered_sha1, list_to_value_ordered_sha1, model_name_to_sha1, diff --git a/cmf/data/utils/db.py b/src/matchbox/data/utils/db.py similarity index 97% rename from cmf/data/utils/db.py rename to src/matchbox/data/utils/db.py index e5b2497..41b9457 100644 --- a/cmf/data/utils/db.py +++ b/src/matchbox/data/utils/db.py @@ -11,8 +11,8 @@ from sqlalchemy.exc import NoSuchTableError from sqlalchemy.orm import Session -from cmf.data import ENGINE, Models, ModelsFrom, SourceDataset -from cmf.data.exceptions import CMFSourceTableError +from matchbox.data import ENGINE, Models, ModelsFrom, SourceDataset +from matchbox.data.exceptions import CMFSourceTableError # Data conversion diff --git a/cmf/data/utils/sha1.py b/src/matchbox/data/utils/sha1.py similarity index 94% rename from cmf/data/utils/sha1.py rename to src/matchbox/data/utils/sha1.py index afb2d03..4eabd15 100644 --- a/cmf/data/utils/sha1.py +++ b/src/matchbox/data/utils/sha1.py @@ -6,10 +6,10 @@ from sqlalchemy import Engine, select from sqlalchemy.orm import Session -from cmf.data import ENGINE, SourceDataset -from cmf.data.exceptions import CMFDBDataError -from cmf.data.models import Models -from cmf.data.utils.db import get_schema_table_names +from matchbox.data import ENGINE, SourceDataset +from matchbox.data.exceptions import CMFDBDataError +from matchbox.data.models import Models +from matchbox.data.utils.db import get_schema_table_names T = TypeVar("T") diff --git a/cmf/datasets.toml b/src/matchbox/datasets.toml similarity index 100% rename from cmf/datasets.toml rename to src/matchbox/datasets.toml diff --git a/src/matchbox/dedupers/__init__.py b/src/matchbox/dedupers/__init__.py new file mode 100644 index 0000000..a480f67 --- /dev/null +++ b/src/matchbox/dedupers/__init__.py @@ -0,0 +1,3 @@ +from matchbox.dedupers.naive import NaiveDeduper + +__all__ = ("NaiveDeduper",) diff --git a/cmf/dedupers/make_deduper.py b/src/matchbox/dedupers/make_deduper.py similarity index 97% rename from cmf/dedupers/make_deduper.py rename to src/matchbox/dedupers/make_deduper.py index 4e1b39c..bdc2c40 100644 --- a/cmf/dedupers/make_deduper.py +++ b/src/matchbox/dedupers/make_deduper.py @@ -5,7 +5,7 @@ from pandas import DataFrame from pydantic import BaseModel, Field, ValidationInfo, field_validator -from cmf.data.results import ProbabilityResults +from matchbox.data.results import ProbabilityResults class DeduperSettings(BaseModel): diff --git a/cmf/dedupers/naive.py b/src/matchbox/dedupers/naive.py similarity index 97% rename from cmf/dedupers/naive.py rename to src/matchbox/dedupers/naive.py index 7c541db..ebaf906 100644 --- a/cmf/dedupers/naive.py +++ b/src/matchbox/dedupers/naive.py @@ -4,7 +4,7 @@ from pandas import ArrowDtype, DataFrame from pydantic import Field -from cmf.dedupers.make_deduper import Deduper, DeduperSettings +from matchbox.dedupers.make_deduper import Deduper, DeduperSettings class NaiveSettings(DeduperSettings): diff --git a/src/matchbox/helpers/__init__.py b/src/matchbox/helpers/__init__.py new file mode 100644 index 0000000..538e15d --- /dev/null +++ b/src/matchbox/helpers/__init__.py @@ -0,0 +1,20 @@ +from matchbox.helpers.cleaner import cleaner, cleaners +from matchbox.helpers.comparison import comparison +from matchbox.helpers.deletion import delete_model +from matchbox.helpers.selector import selector, selectors +from matchbox.helpers.visualisation import draw_model_tree + +__all__ = ( + # Cleaners + "cleaner", + "cleaners", + # Comparisons + "comparison", + # Selectors + "selector", + "selectors", + # Visualisation + "draw_model_tree", + # Deletion + "delete_model", +) diff --git a/cmf/helpers/cleaner.py b/src/matchbox/helpers/cleaner.py similarity index 100% rename from cmf/helpers/cleaner.py rename to src/matchbox/helpers/cleaner.py diff --git a/cmf/helpers/comparison.py b/src/matchbox/helpers/comparison.py similarity index 100% rename from cmf/helpers/comparison.py rename to src/matchbox/helpers/comparison.py diff --git a/cmf/helpers/deletion.py b/src/matchbox/helpers/deletion.py similarity index 94% rename from cmf/helpers/deletion.py rename to src/matchbox/helpers/deletion.py index bd8fe0f..54df757 100644 --- a/cmf/helpers/deletion.py +++ b/src/matchbox/helpers/deletion.py @@ -1,8 +1,8 @@ from sqlalchemy import Engine from sqlalchemy.orm import Session -from cmf.data import ENGINE, Models -from cmf.helpers.selector import get_all_parents +from matchbox.data import ENGINE, Models +from matchbox.helpers.selector import get_all_parents def delete_model(model: str, engine: Engine = ENGINE, certain: bool = False) -> None: diff --git a/cmf/helpers/selector.py b/src/matchbox/helpers/selector.py similarity index 99% rename from cmf/helpers/selector.py rename to src/matchbox/helpers/selector.py index 80f7719..a1f6ea0 100644 --- a/cmf/helpers/selector.py +++ b/src/matchbox/helpers/selector.py @@ -16,7 +16,7 @@ from sqlalchemy.orm import Session, aliased from sqlalchemy.sql.selectable import Select -from cmf.data import ( +from matchbox.data import ( ENGINE, Clusters, DDupeContains, @@ -25,7 +25,11 @@ SourceData, clusters_association, ) -from cmf.data.utils import get_schema_table_names, string_to_dataset, string_to_table +from matchbox.data.utils import ( + get_schema_table_names, + string_to_dataset, + string_to_table, +) def selector( diff --git a/cmf/helpers/visualisation.py b/src/matchbox/helpers/visualisation.py similarity index 92% rename from cmf/helpers/visualisation.py rename to src/matchbox/helpers/visualisation.py index ad40869..6d93aad 100644 --- a/cmf/helpers/visualisation.py +++ b/src/matchbox/helpers/visualisation.py @@ -3,8 +3,8 @@ from rustworkx.visualization import mpl_draw from sqlalchemy import Engine -from cmf.data import ENGINE -from cmf.data.utils import get_model_subgraph +from matchbox.data import ENGINE +from matchbox.data.utils import get_model_subgraph def draw_model_tree(engine: Engine = ENGINE) -> Figure: diff --git a/src/matchbox/linkers/__init__.py b/src/matchbox/linkers/__init__.py new file mode 100644 index 0000000..0d893a7 --- /dev/null +++ b/src/matchbox/linkers/__init__.py @@ -0,0 +1,5 @@ +from matchbox.linkers.deterministic import DeterministicLinker +from matchbox.linkers.splinklinker import SplinkLinker +from matchbox.linkers.weighteddeterministic import WeightedDeterministicLinker + +__all__ = ("DeterministicLinker", "WeightedDeterministicLinker", "SplinkLinker") diff --git a/cmf/linkers/deterministic.py b/src/matchbox/linkers/deterministic.py similarity index 96% rename from cmf/linkers/deterministic.py rename to src/matchbox/linkers/deterministic.py index f50a988..2b5ed60 100644 --- a/cmf/linkers/deterministic.py +++ b/src/matchbox/linkers/deterministic.py @@ -4,8 +4,8 @@ from pandas import ArrowDtype, DataFrame from pydantic import Field, field_validator -from cmf.helpers import comparison -from cmf.linkers.make_linker import Linker, LinkerSettings +from matchbox.helpers import comparison +from matchbox.linkers.make_linker import Linker, LinkerSettings class DeterministicSettings(LinkerSettings): diff --git a/cmf/linkers/make_linker.py b/src/matchbox/linkers/make_linker.py similarity index 97% rename from cmf/linkers/make_linker.py rename to src/matchbox/linkers/make_linker.py index 6336d87..599aea2 100644 --- a/cmf/linkers/make_linker.py +++ b/src/matchbox/linkers/make_linker.py @@ -5,7 +5,7 @@ from pandas import DataFrame from pydantic import BaseModel, Field, ValidationInfo, field_validator -from cmf.data.results import ProbabilityResults +from matchbox.data.results import ProbabilityResults class LinkerSettings(BaseModel): diff --git a/cmf/linkers/splinklinker.py b/src/matchbox/linkers/splinklinker.py similarity index 99% rename from cmf/linkers/splinklinker.py rename to src/matchbox/linkers/splinklinker.py index f5cfcbc..628688d 100644 --- a/cmf/linkers/splinklinker.py +++ b/src/matchbox/linkers/splinklinker.py @@ -8,7 +8,7 @@ from splink.duckdb.linker import DuckDBLinker from splink.linker import Linker as SplinkLibLinkerClass -from cmf.linkers.make_linker import Linker, LinkerSettings +from matchbox.linkers.make_linker import Linker, LinkerSettings logic_logger = logging.getLogger("cmf_logic") diff --git a/cmf/linkers/weighteddeterministic.py b/src/matchbox/linkers/weighteddeterministic.py similarity index 97% rename from cmf/linkers/weighteddeterministic.py rename to src/matchbox/linkers/weighteddeterministic.py index 75e611c..9257dcd 100644 --- a/cmf/linkers/weighteddeterministic.py +++ b/src/matchbox/linkers/weighteddeterministic.py @@ -4,8 +4,8 @@ from pandas import ArrowDtype, DataFrame from pydantic import BaseModel, Field, field_validator -from cmf.helpers import comparison -from cmf.linkers.make_linker import Linker, LinkerSettings +from matchbox.helpers import comparison +from matchbox.linkers.make_linker import Linker, LinkerSettings class WeightedComparison(BaseModel): diff --git a/cmf/locations.py b/src/matchbox/locations.py similarity index 100% rename from cmf/locations.py rename to src/matchbox/locations.py diff --git a/test/fixtures/data.py b/test/fixtures/data.py index 0ffd3d0..59ba5ad 100644 --- a/test/fixtures/data.py +++ b/test/fixtures/data.py @@ -7,28 +7,32 @@ import pandas as pd import pytest from dotenv import find_dotenv, load_dotenv +from matchbox import process, query +from matchbox.clean import company_name +from matchbox.helpers import cleaner, cleaners, selector from pandas import DataFrame from sqlalchemy.engine import Engine -import cmf.locations as loc -from cmf import process, query -from cmf.clean import company_name -from cmf.helpers import cleaner, cleaners, selector - dotenv_path = find_dotenv() load_dotenv(dotenv_path) LOGGER = logging.getLogger(__name__) +TEST_ROOT = Path(__file__).resolve().parents[1] + + +@pytest.fixture(scope="session") +def test_root_dir() -> Path: + return TEST_ROOT @pytest.fixture(scope="session") -def all_companies() -> DataFrame: +def all_companies(test_root_dir: Path) -> DataFrame: """ Raw, correct company data. Uses UUID as ID to replicate Data Workspace. 1,000 entries. """ df = pd.read_csv( - Path(loc.TEST, "data", "all_companies.csv"), encoding="utf-8" + Path(test_root_dir, "data", "all_companies.csv"), encoding="utf-8" ).reset_index(names="id") df["id"] = df["id"].apply(lambda x: uuid.UUID(int=x)) return df diff --git a/test/fixtures/db.py b/test/fixtures/db.py index 2e7e1cd..c3c2b4e 100644 --- a/test/fixtures/db.py +++ b/test/fixtures/db.py @@ -1,22 +1,14 @@ import hashlib import logging -import os import random from typing import Callable, Generator import pytest from _pytest.fixtures import FixtureRequest from dotenv import find_dotenv, load_dotenv -from pandas import DataFrame -from sqlalchemy import MetaData, create_engine, inspect, text -from sqlalchemy.dialects.postgresql import insert -from sqlalchemy.engine import Engine -from sqlalchemy.orm import Session -from sqlalchemy.schema import CreateSchema - -from cmf import make_deduper, make_linker, to_clusters -from cmf.admin import add_dataset -from cmf.data import ( +from matchbox import make_deduper, make_linker, to_clusters +from matchbox.admin import add_dataset +from matchbox.data import ( Clusters, CMFBase, DDupeContains, @@ -31,6 +23,12 @@ SourceDataset, clusters_association, ) +from pandas import DataFrame +from sqlalchemy import MetaData, create_engine, inspect, text +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session +from sqlalchemy.schema import CreateSchema from .models import DedupeTestParams, LinkTestParams, ModelTestParams @@ -49,7 +47,7 @@ def db_clear_all() -> Callable[[Engine], None]: """ def _db_clear_all(db_engine: Engine) -> None: - db_metadata = MetaData(schema=os.getenv("SCHEMA")) + db_metadata = MetaData(schema="test") db_metadata.reflect(bind=db_engine) with Session(db_engine) as session: for table in reversed(db_metadata.sorted_tables): @@ -122,21 +120,21 @@ def _db_add_data(db_engine: Engine) -> None: crn_companies.to_sql( "crn", con=conn, - schema=os.getenv("SCHEMA"), + schema="test", if_exists="replace", index=False, ) duns_companies.to_sql( "duns", con=conn, - schema=os.getenv("SCHEMA"), + schema="test", if_exists="replace", index=False, ) cdms_companies.to_sql( "cdms", con=conn, - schema=os.getenv("SCHEMA"), + schema="test", if_exists="replace", index=False, ) @@ -145,17 +143,17 @@ def _db_add_data(db_engine: Engine) -> None: datasets = { "crn_table": { - "schema": os.getenv("SCHEMA"), + "schema": "test", "table": "crn", "id": "id", }, "duns_table": { - "schema": os.getenv("SCHEMA"), + "schema": "test", "table": "duns", "id": "id", }, "cdms_table": { - "schema": os.getenv("SCHEMA"), + "schema": "test", "table": "cdms", "id": "id", }, @@ -425,9 +423,14 @@ def db_engine( ) with engine.connect() as conn: + # Install relevant extensions + conn.execute(text('create extension if not exists "uuid-ossp";')) + conn.execute(text("create extension if not exists pgcrypto;")) + conn.commit() + # Create CMF schema - if not inspect(conn).has_schema(os.getenv("SCHEMA")): - conn.execute(CreateSchema(os.getenv("SCHEMA"))) + if not inspect(conn).has_schema("test"): + conn.execute(CreateSchema("test")) conn.commit() # Create CMF tables @@ -453,12 +456,9 @@ def cleanup(db_engine, request): def teardown(): with db_engine.connect() as conn: inspector = inspect(conn) - for table_name in inspector.get_table_names(schema=os.getenv("SCHEMA")): + for table_name in inspector.get_table_names(schema="test"): conn.execute( - text( - f'DROP TABLE IF EXISTS "{os.getenv("SCHEMA")}".' - f'"{table_name}" CASCADE;' - ) + text(f'DROP TABLE IF EXISTS "{"test"}".' f'"{table_name}" CASCADE;') ) conn.commit() diff --git a/test/fixtures/models.py b/test/fixtures/models.py index 2ce9a16..1be989f 100644 --- a/test/fixtures/models.py +++ b/test/fixtures/models.py @@ -2,15 +2,18 @@ from typing import Any, Callable, Dict, Type, Union import splink.duckdb.comparison_library as cl +from matchbox.dedupers import NaiveDeduper +from matchbox.dedupers.make_deduper import Deduper +from matchbox.linkers import ( + DeterministicLinker, + SplinkLinker, + WeightedDeterministicLinker, +) +from matchbox.linkers.make_linker import Linker from pydantic import BaseModel, Field from splink.duckdb import blocking_rule_library as brl from splink.duckdb.linker import DuckDBLinker -from cmf.dedupers import NaiveDeduper -from cmf.dedupers.make_deduper import Deduper -from cmf.linkers import DeterministicLinker, SplinkLinker, WeightedDeterministicLinker -from cmf.linkers.make_linker import Linker - class DedupeTestParams(BaseModel): """Data class for raw dataset testing parameters and attributes.""" diff --git a/test/test_cleaning.py b/test/test_cleaning.py index f00df42..85a7d60 100644 --- a/test/test_cleaning.py +++ b/test/test_cleaning.py @@ -1,22 +1,21 @@ import ast from functools import partial from pathlib import Path +from typing import Callable import duckdb import pandas as pd import pyarrow as pa import pytest - -from cmf import locations as loc -from cmf.clean import drop -from cmf.clean.steps import ( +from matchbox.clean import drop +from matchbox.clean.steps import ( clean_punctuation, expand_abbreviations, list_join_to_string, remove_stopwords, tokenise, ) -from cmf.clean.utils import alias, cleaning_function, unnest_renest +from matchbox.clean.utils import alias, cleaning_function, unnest_renest """ ---------------------------- @@ -46,7 +45,7 @@ """ -def load_test_data(path): +def load_test_data(path: Path) -> tuple[pd.DataFrame, pd.DataFrame]: dirty = pd.read_csv(Path(path, "dirty.csv"), converters={"list": ast.literal_eval}) clean = pd.read_csv(Path(path, "clean.csv"), converters={"list": ast.literal_eval}) @@ -64,7 +63,7 @@ def load_test_data(path): return dirty, clean -def passthrough(input_column): +def passthrough(input_column: str) -> str: """ A passthrough cleaning function that does nothing. Helps test more complex building functions. @@ -87,7 +86,7 @@ def passthrough(input_column): @pytest.mark.parametrize("test", cleaning_tests) -def test_basic_functions(test): +def test_basic_functions(test: tuple[str, Callable], test_root_dir: Path): """ Tests whether the basic cleaning functions do what they're supposed to. More complex functions should follow from here. @@ -95,7 +94,7 @@ def test_basic_functions(test): test_name = test[0] test_cleaning_function = test[1] - dirty, clean = load_test_data(Path(loc.PROJECT_DIR, "test", "cleaning", test_name)) + dirty, clean = load_test_data(Path(test_root_dir, "cleaning", test_name)) cleaned = ( duckdb.sql( @@ -130,7 +129,7 @@ def test_basic_functions(test): @pytest.mark.parametrize("test", function_tests) -def test_function(test): +def test_function(test: tuple[str, Callable], test_root_dir: Path): """ Tests whether the cleaning function is accurately combining basic functions. @@ -139,7 +138,7 @@ def test_function(test): test_cleaning_function = cleaning_function(*test[1]) dirty, clean = load_test_data( - Path(loc.PROJECT_DIR, "test", "cleaning", "cleaning_function", test_name) + Path(test_root_dir, "cleaning", "cleaning_function", test_name) ) cleaned = test_cleaning_function(dirty, column="col") @@ -154,7 +153,7 @@ def test_function(test): @pytest.mark.parametrize("test", nest_unnest_tests) -def test_nest_unnest(test): +def test_nest_unnest(test: tuple[str, Callable], test_root_dir: Path): """ Tests whether the nest_unnest function is working. """ @@ -162,7 +161,7 @@ def test_nest_unnest(test): test_cleaning_function = cleaning_function(test[1]) dirty, clean = load_test_data( - Path(loc.PROJECT_DIR, "test", "cleaning", "unnest_renest", test_name) + Path(test_root_dir, "cleaning", "unnest_renest", test_name) ) test_cleaning_function_arrayed = unnest_renest(test_cleaning_function) @@ -179,13 +178,13 @@ def test_nest_unnest(test): assert cleaned.equals(clean) -def test_alias(): +def test_alias(test_root_dir: Path): """ Tests whether the alias function is working. """ test_cleaning_function = cleaning_function(passthrough) - dirty, clean = load_test_data(Path(loc.PROJECT_DIR, "test", "cleaning", "alias")) + dirty, clean = load_test_data(Path(test_root_dir, "cleaning", "alias")) alias_function = alias(test_cleaning_function, "foo") @@ -194,11 +193,11 @@ def test_alias(): assert "foo" in cleaned.columns -def test_drop(): +def test_drop(test_root_dir: Path): """ Tests whether the drop function is working. """ - dirty, clean = load_test_data(Path(loc.PROJECT_DIR, "test", "cleaning", "alias")) + dirty, clean = load_test_data(Path(test_root_dir, "cleaning", "alias")) cleaned = drop(dirty, column="col") diff --git a/test/test_db.py b/test/test_db.py index 08d420b..49b40b3 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -1,13 +1,9 @@ import itertools import logging -import os from dotenv import find_dotenv, load_dotenv -from sqlalchemy import MetaData, Table, delete, insert, inspect, text -from sqlalchemy.orm import Session - -from cmf.admin import add_dataset -from cmf.data import ( +from matchbox.admin import add_dataset +from matchbox.data import ( Clusters, DDupeProbabilities, Dedupes, @@ -18,6 +14,8 @@ SourceDataset, clusters_association, ) +from sqlalchemy import MetaData, Table, delete, insert, inspect, text +from sqlalchemy.orm import Session from .fixtures.models import ( dedupe_data_test_params, @@ -36,7 +34,7 @@ def test_database(db_engine): """ Test the database contains all the tables we expect. """ - tables = set(inspect(db_engine).get_table_names(schema=os.getenv("SCHEMA"))) + tables = set(inspect(db_engine).get_table_names(schema="test")) to_check = { "crn", "duns", @@ -114,11 +112,11 @@ def test_insert_data(db_engine, crn_companies, duns_companies, cdms_companies): ] with Session(db_engine) as session: # Reflect the table and insert the data - db_metadata = MetaData(schema=os.getenv("SCHEMA")) + db_metadata = MetaData(schema="test") crn_table = Table( "crn", db_metadata, - schema=os.getenv("SCHEMA"), + schema="test", autoload_with=session.get_bind(), ) session.execute(insert(crn_table), new_data) @@ -127,7 +125,7 @@ def test_insert_data(db_engine, crn_companies, duns_companies, cdms_companies): # Add the dataset again add_dataset( { - "schema": os.getenv("SCHEMA"), + "schema": "test", "table": "crn", "id": "id", }, diff --git a/test/test_dedupers.py b/test/test_dedupers.py index 83401fc..d8e512b 100644 --- a/test/test_dedupers.py +++ b/test/test_dedupers.py @@ -1,10 +1,9 @@ import pytest +from matchbox import make_deduper, to_clusters +from matchbox.data import Models from pandas import DataFrame from sqlalchemy.orm import Session -from cmf import make_deduper, to_clusters -from cmf.data import Models - from .fixtures.models import dedupe_data_test_params, dedupe_model_test_params diff --git a/test/test_helpers.py b/test/test_helpers.py index 70e855f..61487ac 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -2,13 +2,9 @@ import os from dotenv import find_dotenv, load_dotenv -from matplotlib.figure import Figure -from pandas import DataFrame -from sqlalchemy.orm import Session - -from cmf import process, query -from cmf.clean import company_name, company_number -from cmf.data import ( +from matchbox import process, query +from matchbox.clean import company_name, company_number +from matchbox.data import ( Clusters, DDupeProbabilities, Dedupes, @@ -17,7 +13,7 @@ Models, clusters_association, ) -from cmf.helpers import ( +from matchbox.helpers import ( cleaner, cleaners, comparison, @@ -26,6 +22,9 @@ selector, selectors, ) +from matplotlib.figure import Figure +from pandas import DataFrame +from sqlalchemy.orm import Session from .fixtures.models import ( dedupe_data_test_params, diff --git a/test/test_linkers.py b/test/test_linkers.py index 6df0861..1afff01 100644 --- a/test/test_linkers.py +++ b/test/test_linkers.py @@ -1,10 +1,9 @@ import pytest +from matchbox import make_linker, to_clusters +from matchbox.data import Models from pandas import DataFrame from sqlalchemy.orm import Session -from cmf import make_linker, to_clusters -from cmf.data import Models - from .fixtures.models import ( dedupe_data_test_params, dedupe_model_test_params, diff --git a/test/test_utils.py b/test/test_utils.py index c992870..f1805d9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,7 +1,6 @@ +from matchbox.data import utils as du from pandas import Series, concat -from cmf.data import utils as du - def test_sha1_conversion(all_companies): """Tests SHA1 conversion works as expected.""" diff --git a/uv.lock b/uv.lock index 98fb7a0..f721c37 100644 --- a/uv.lock +++ b/uv.lock @@ -196,67 +196,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 }, ] -[[package]] -name = "company-matching-framework" -version = "0.1.0" -source = { virtual = "." } -dependencies = [ - { name = "altair" }, - { name = "click" }, - { name = "duckdb" }, - { name = "matplotlib" }, - { name = "pandas" }, - { name = "pg-bulk-ingest" }, - { name = "psycopg2-binary" }, - { name = "pyarrow" }, - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "rustworkx" }, - { name = "splink" }, - { name = "sqlalchemy" }, - { name = "tomli" }, -] - -[package.dev-dependencies] -dev = [ - { name = "docker" }, - { name = "ipykernel" }, - { name = "pre-commit" }, - { name = "pytest" }, - { name = "pytest-cov" }, - { name = "pytest-env" }, - { name = "ruff" }, -] - -[package.metadata] -requires-dist = [ - { name = "altair", specifier = ">=5.4.1" }, - { name = "click", specifier = ">=8.1.7" }, - { name = "duckdb", specifier = ">=1.1.1" }, - { name = "matplotlib", specifier = ">=3.9.2" }, - { name = "pandas", specifier = ">=2.2.3" }, - { name = "pg-bulk-ingest", specifier = ">=0.0.54" }, - { name = "psycopg2-binary", specifier = ">=2.9.9" }, - { name = "pyarrow", specifier = ">=17.0.0" }, - { name = "pydantic", specifier = ">=2.9.2" }, - { name = "python-dotenv", specifier = ">=1.0.1" }, - { name = "rustworkx", specifier = ">=0.15.1" }, - { name = "splink", specifier = "<4" }, - { name = "sqlalchemy", specifier = ">=2.0.35" }, - { name = "tomli", specifier = ">=2.0.1" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "docker", specifier = ">=7.1.0" }, - { name = "ipykernel", specifier = ">=6.29.5" }, - { name = "pre-commit", specifier = ">=3.8.0" }, - { name = "pytest", specifier = ">=8.3.3" }, - { name = "pytest-cov", specifier = ">=5.0.0" }, - { name = "pytest-env", specifier = ">=1.1.5" }, - { name = "ruff", specifier = ">=0.6.8" }, -] - [[package]] name = "contourpy" version = "1.3.0" @@ -798,6 +737,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 }, ] +[[package]] +name = "matchbox" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "altair" }, + { name = "click" }, + { name = "duckdb" }, + { name = "matplotlib" }, + { name = "pandas" }, + { name = "pg-bulk-ingest" }, + { name = "psycopg2-binary" }, + { name = "pyarrow" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "rustworkx" }, + { name = "splink" }, + { name = "sqlalchemy" }, + { name = "tomli" }, +] + +[package.dev-dependencies] +dev = [ + { name = "docker" }, + { name = "ipykernel" }, + { name = "pre-commit" }, + { name = "pytest" }, + { name = "pytest-cov" }, + { name = "pytest-env" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "altair", specifier = ">=5.4.1" }, + { name = "click", specifier = ">=8.1.7" }, + { name = "duckdb", specifier = ">=1.1.1" }, + { name = "matplotlib", specifier = ">=3.9.2" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "pg-bulk-ingest", specifier = ">=0.0.54" }, + { name = "psycopg2-binary", specifier = ">=2.9.9" }, + { name = "pyarrow", specifier = ">=17.0.0" }, + { name = "pydantic", specifier = ">=2.9.2" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "rustworkx", specifier = ">=0.15.1" }, + { name = "splink", specifier = "<4" }, + { name = "sqlalchemy", specifier = ">=2.0.35" }, + { name = "tomli", specifier = ">=2.0.1" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "docker", specifier = ">=7.1.0" }, + { name = "ipykernel", specifier = ">=6.29.5" }, + { name = "pre-commit", specifier = ">=3.8.0" }, + { name = "pytest", specifier = ">=8.3.3" }, + { name = "pytest-cov", specifier = ">=5.0.0" }, + { name = "pytest-env", specifier = ">=1.1.5" }, + { name = "ruff", specifier = ">=0.6.8" }, +] + [[package]] name = "matplotlib" version = "3.9.2"