From c40617d9facb20b820c9e1c460ca5dedf6b62244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Br=C3=BCckner?= Date: Tue, 17 Sep 2024 13:28:37 +0200 Subject: [PATCH] Replace custom script with Paperless-ngx CLI --- docs/post-consumption/content-matching.md | 23 ++--- .../10-install-additional-packages.sh | 4 +- .../content-matching/organize.config.yml.tpl | 2 +- .../content-matching/pngx-update-document.py | 85 ------------------- 4 files changed, 13 insertions(+), 101 deletions(-) delete mode 100755 scripts/post-consumption/content-matching/pngx-update-document.py diff --git a/docs/post-consumption/content-matching.md b/docs/post-consumption/content-matching.md index e04cf55..25f9a45 100644 --- a/docs/post-consumption/content-matching.md +++ b/docs/post-consumption/content-matching.md @@ -8,13 +8,12 @@ Paperless-ngx does a great job matching documents with correct correspondents, s However, there are documents for which the automatic matching doesn't work or a single regular expression match isn't sufficient. For such cases, further examining the document's content after consumption is necessary. -## Update document details via organize +## Update document details via organize and the Paperless-ngx CLI [organize](https://github.com/tfeldmann/organize) is an open-source, command-line file management automation tool. It allows to execute certain actions based on custom filters. These can be easily defined in YAML. -Probably the most helpful filter in this context is the `filecontent` filter. The document's content can be matched with regular expressions -which allows to dynamically re-use (parts of) the matched content in subsequent actions. +Probably the most helpful filter in this context is the `filecontent` filter. The document's content can be matched with regular expressions which allows to dynamically re-use (parts of) the matched content in subsequent actions. Following script @@ -22,12 +21,15 @@ Following script This helps to stick to a consistent naming pattern for documents that you receive regularly, e.g. invoices. 2. extracts a value out of the document content and stores it in a given custom field +The Paperless-ngx CLI can be used to update other fields as well. Check the CLI's help or [GitHub repository](https://github.com/marcelbrueckner/paperless-ngx-cli) for more information. + ### Prerequisites For this solution to work, you will need to install the following packages: * [organize-tool](https://pypi.org/project/organize-tool/) * [poppler](https://poppler.freedesktop.org/)[^1] +* [pypaperless-cli](https://pypi.org/project/pypaperless-cli/) [^1]: Poppler is required for organize's `filecontent` filter to work, see [https://github.com/tfeldmann/organize/issues/322](https://github.com/tfeldmann/organize/issues/322). @@ -41,8 +43,7 @@ Sticking to the general idea of our scripts folder layout, we will end up with f paperless-ngx/ ├─ my-post-consumption-scripts/ │ ├─ organize/ -│ │ ├─ organize.config.yml.tpl -│ │ └─ pngx-update-document.py +│ │ └─ organize.config.yml.tpl │ └─ post-consumption-wrapper.sh │ # Obviously the below file only exists │ # if you're running Paperless-ngx via Docker Compose @@ -57,9 +58,10 @@ paperless-ngx/ ```bash # Token to access the REST API - PAPERLESS_TOKEN= + PNGX_TOKEN= # Your Paperless-ngx URL, without trailing slash - PAPERLESS_URL= + # If running your post-consumption script within Docker, its likely to be http://localhost:8000 + PNGX_HOST= ``` === "organize.config.yml.tpl" @@ -68,12 +70,6 @@ paperless-ngx/ --8<-- "scripts/post-consumption/content-matching/organize.config.yml.tpl" ``` -=== "pngx-update-document.py" - - ```python - --8<-- "scripts/post-consumption/content-matching/pngx-update-document.py" - ``` - === "post-consumption-wrapper.sh" ```bash @@ -89,4 +85,3 @@ paperless-ngx/ ## Notes Script files can also be found on [GitHub](https://github.com/marcelbrueckner/paperless.sh/tree/main/scripts/post-consumption/content-matching). - diff --git a/scripts/post-consumption/content-matching/10-install-additional-packages.sh b/scripts/post-consumption/content-matching/10-install-additional-packages.sh index a32e7cd..908869c 100755 --- a/scripts/post-consumption/content-matching/10-install-additional-packages.sh +++ b/scripts/post-consumption/content-matching/10-install-additional-packages.sh @@ -5,5 +5,7 @@ # Add additional information to consumed documents # based on hypercomplex ;) rules # https://github.com/tfeldmann/organize/ +# https://github.com/marcelbrueckner/paperless-ngx-cli apt-get install poppler-utils -pip install organize-tool +pip install --root-user-action=ignore organize-tool +pip install --root-user-action=ignore pypaperless-cli diff --git a/scripts/post-consumption/content-matching/organize.config.yml.tpl b/scripts/post-consumption/content-matching/organize.config.yml.tpl index c3a0e30..5c816b5 100755 --- a/scripts/post-consumption/content-matching/organize.config.yml.tpl +++ b/scripts/post-consumption/content-matching/organize.config.yml.tpl @@ -20,5 +20,5 @@ rules: - filecontent: 'Amount due.*(?P\d{2}\.\d{2})' actions: - echo: "Home Assistant hooray" - - shell: "./pngx-update-document.py --url http://localhost:8000 --document-id {env.DOCUMENT_ID} --title '{filecontent.title}' --custom-field-id 1 --custom-field-value {filecontent.amount}" + - shell: "pngx edit {env.DOCUMENT_ID} --title '{filecontent.title}' --custom-fields 1={filecontent.amount}" - echo: "{shell.output}" diff --git a/scripts/post-consumption/content-matching/pngx-update-document.py b/scripts/post-consumption/content-matching/pngx-update-document.py deleted file mode 100755 index 7bd9fcd..0000000 --- a/scripts/post-consumption/content-matching/pngx-update-document.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python - -# Work in progress -# Only allows updating the title and a single custom field at the moment - -import argparse, httpx, os, sys - -parser = argparse.ArgumentParser(description='Update a single document via Paperless-ngx REST API') -parser.add_argument('--url', - dest='url', - action='store', - help='Your Paperless-ngx URL', - default=os.environ.get('PAPERLESS_URL') -) -parser.add_argument('--auth-token', - dest='token', - action='store', - help='Your Paperless-ngx REST API authentication token', - default=os.environ.get('PAPERLESS_TOKEN') -) -parser.add_argument('--document-id', - dest='id', - type=int, - action='store', - help='ID of the document that should be updated', - required=True -) -parser.add_argument('--title', - dest='title', - action='store', - help='Set the document title' -) -parser.add_argument('--custom-field-id', - dest='custom_field_id', - type=int, - action='store', - help='ID of the custom field that should be updated' -) -parser.add_argument('--custom-field-value', - dest='custom_field_value', - action='store', - help='Value of the custom field that should be stored' -) -args = parser.parse_args() - -headers = {'Authorization': f'Token {args.token}'} -data = {} - -# Update title -if args.title is not None: - data['title'] = args.title - -# Update custom field -# Only if both --custom-field-id and --custom-field-value have been specified -if all(param is not None for param in [args.custom_field_id, args.custom_field_value]): - new_field = { - "field": args.custom_field_id, - "value": args.custom_field_value - } - - # Even when patching a single custom field, we need to include all of the document's existing custom fields - # Otherwise, other custom fields will be removed from the document - response = httpx.get(f"{args.url}/api/documents/{args.id}/", headers=headers) - - if response.is_error: - msg = "HTTP error {} while trying to obtain document details via REST API at {}." - sys.exit(msg.format(response.status_code, args.url)) - - data['custom_fields'] = response.json()['custom_fields'] - - # Update custom field value "in-place" if already attached to document (to keep custom field order) - if any(custom_field['field'] == args.custom_field_id for custom_field in data['custom_fields']): - data['custom_fields'] = [(new_field if custom_field['field'] == args.custom_field_id else custom_field) for custom_field in data['custom_fields']] - # Otherwise, simply append to the list - else: - data['custom_fields'] = data['custom_fields'].append(new_field) - -if data: - response = httpx.patch(f"{args.url}/api/documents/{args.id}/", headers=headers, json=data) - - if response.is_error: - msg = "HTTP error {} while trying to update document via REST API at {}." - sys.exit(msg.format(response.status_code, args.url, data)) - - print(f"Document with ID {args.id} successfully updated")