From c40617d9facb20b820c9e1c460ca5dedf6b62244 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcel=20Br=C3=BCckner?=
 <marcelbrueckner@users.noreply.github.com>
Date: Tue, 17 Sep 2024 13:28:37 +0200
Subject: [PATCH] Replace custom script with Paperless-ngx CLI

---
 docs/post-consumption/content-matching.md     | 23 ++---
 .../10-install-additional-packages.sh         |  4 +-
 .../content-matching/organize.config.yml.tpl  |  2 +-
 .../content-matching/pngx-update-document.py  | 85 -------------------
 4 files changed, 13 insertions(+), 101 deletions(-)
 delete mode 100755 scripts/post-consumption/content-matching/pngx-update-document.py

diff --git a/docs/post-consumption/content-matching.md b/docs/post-consumption/content-matching.md
index e04cf55..25f9a45 100644
--- a/docs/post-consumption/content-matching.md
+++ b/docs/post-consumption/content-matching.md
@@ -8,13 +8,12 @@ Paperless-ngx does a great job matching documents with correct correspondents, s
 However, there are documents for which the automatic matching doesn't work or a single regular expression match isn't sufficient.
 For such cases, further examining the document's content after consumption is necessary.
 
-## Update document details via organize
+## Update document details via organize and the Paperless-ngx CLI
 
 [organize](https://github.com/tfeldmann/organize) is an open-source, command-line file management automation tool.
 It allows to execute certain actions based on custom filters. These can be easily defined in YAML.
 
-Probably the most helpful filter in this context is the `filecontent` filter. The document's content can be matched with regular expressions
-which allows to dynamically re-use (parts of) the matched content in subsequent actions.
+Probably the most helpful filter in this context is the `filecontent` filter. The document's content can be matched with regular expressions which allows to dynamically re-use (parts of) the matched content in subsequent actions.
 
 Following script
 
@@ -22,12 +21,15 @@ Following script
     This helps to stick to a consistent naming pattern for documents that you receive regularly, e.g. invoices.
 2. extracts a value out of the document content and stores it in a given custom field
 
+The Paperless-ngx CLI can be used to update other fields as well. Check the CLI's help or [GitHub repository](https://github.com/marcelbrueckner/paperless-ngx-cli) for more information.
+
 ### Prerequisites
 
 For this solution to work, you will need to install the following packages:
 
 * [organize-tool](https://pypi.org/project/organize-tool/)
 * [poppler](https://poppler.freedesktop.org/)[^1]
+* [pypaperless-cli](https://pypi.org/project/pypaperless-cli/)
 
 [^1]: Poppler is required for organize's `filecontent` filter to work, see [https://github.com/tfeldmann/organize/issues/322](https://github.com/tfeldmann/organize/issues/322).
 
@@ -41,8 +43,7 @@ Sticking to the general idea of our scripts folder layout, we will end up with f
 paperless-ngx/
 ├─ my-post-consumption-scripts/
 │  ├─ organize/
-│  │  ├─ organize.config.yml.tpl
-│  │  └─ pngx-update-document.py
+│  │  └─ organize.config.yml.tpl
 │  └─ post-consumption-wrapper.sh
 │  # Obviously the below file only exists
 │  # if you're running Paperless-ngx via Docker Compose
@@ -57,9 +58,10 @@ paperless-ngx/
 
     ```bash
     # Token to access the REST API
-    PAPERLESS_TOKEN=
+    PNGX_TOKEN=
     # Your Paperless-ngx URL, without trailing slash
-    PAPERLESS_URL=
+    # If running your post-consumption script within Docker, its likely to be http://localhost:8000
+    PNGX_HOST=
     ```
 
 === "organize.config.yml.tpl"
@@ -68,12 +70,6 @@ paperless-ngx/
     --8<-- "scripts/post-consumption/content-matching/organize.config.yml.tpl"
     ```
 
-=== "pngx-update-document.py"
-
-    ```python
-    --8<-- "scripts/post-consumption/content-matching/pngx-update-document.py"
-    ```
-
 === "post-consumption-wrapper.sh"
 
     ```bash
@@ -89,4 +85,3 @@ paperless-ngx/
 ## Notes
 
 Script files can also be found on [GitHub](https://github.com/marcelbrueckner/paperless.sh/tree/main/scripts/post-consumption/content-matching).
-
diff --git a/scripts/post-consumption/content-matching/10-install-additional-packages.sh b/scripts/post-consumption/content-matching/10-install-additional-packages.sh
index a32e7cd..908869c 100755
--- a/scripts/post-consumption/content-matching/10-install-additional-packages.sh
+++ b/scripts/post-consumption/content-matching/10-install-additional-packages.sh
@@ -5,5 +5,7 @@
 # Add additional information to consumed documents
 # based on hypercomplex ;) rules
 # https://github.com/tfeldmann/organize/
+# https://github.com/marcelbrueckner/paperless-ngx-cli
 apt-get install poppler-utils
-pip install organize-tool
+pip install --root-user-action=ignore organize-tool
+pip install --root-user-action=ignore pypaperless-cli
diff --git a/scripts/post-consumption/content-matching/organize.config.yml.tpl b/scripts/post-consumption/content-matching/organize.config.yml.tpl
index c3a0e30..5c816b5 100755
--- a/scripts/post-consumption/content-matching/organize.config.yml.tpl
+++ b/scripts/post-consumption/content-matching/organize.config.yml.tpl
@@ -20,5 +20,5 @@ rules:
       - filecontent: 'Amount due.*(?P<amount>\d{2}\.\d{2})'
     actions:
       - echo: "Home Assistant hooray"
-      - shell: "./pngx-update-document.py --url http://localhost:8000 --document-id {env.DOCUMENT_ID} --title '{filecontent.title}' --custom-field-id 1 --custom-field-value {filecontent.amount}"
+      - shell: "pngx edit {env.DOCUMENT_ID} --title '{filecontent.title}' --custom-fields 1={filecontent.amount}"
       - echo: "{shell.output}"
diff --git a/scripts/post-consumption/content-matching/pngx-update-document.py b/scripts/post-consumption/content-matching/pngx-update-document.py
deleted file mode 100755
index 7bd9fcd..0000000
--- a/scripts/post-consumption/content-matching/pngx-update-document.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python
-
-# Work in progress
-# Only allows updating the title and a single custom field at the moment
-
-import argparse, httpx, os, sys
-
-parser = argparse.ArgumentParser(description='Update a single document via Paperless-ngx REST API')
-parser.add_argument('--url',
-    dest='url',
-    action='store',
-    help='Your Paperless-ngx URL',
-    default=os.environ.get('PAPERLESS_URL')
-)
-parser.add_argument('--auth-token',
-    dest='token',
-    action='store',
-    help='Your Paperless-ngx REST API authentication token',
-    default=os.environ.get('PAPERLESS_TOKEN')
-)
-parser.add_argument('--document-id',
-    dest='id',
-    type=int,
-    action='store',
-    help='ID of the document that should be updated',
-    required=True
-)
-parser.add_argument('--title',
-    dest='title',
-    action='store',
-    help='Set the document title'
-)
-parser.add_argument('--custom-field-id',
-    dest='custom_field_id',
-    type=int,
-    action='store',
-    help='ID of the custom field that should be updated'
-)
-parser.add_argument('--custom-field-value',
-    dest='custom_field_value',
-    action='store',
-    help='Value of the custom field that should be stored'
-)
-args = parser.parse_args()
-
-headers = {'Authorization': f'Token {args.token}'}
-data = {}
-
-# Update title
-if args.title is not None:
-    data['title'] = args.title
-
-# Update custom field
-# Only if both --custom-field-id and --custom-field-value have been specified
-if all(param is not None for param in [args.custom_field_id, args.custom_field_value]):
-    new_field = {
-        "field": args.custom_field_id,
-        "value": args.custom_field_value
-    }
-
-    # Even when patching a single custom field, we need to include all of the document's existing custom fields
-    # Otherwise, other custom fields will be removed from the document
-    response = httpx.get(f"{args.url}/api/documents/{args.id}/", headers=headers)
-
-    if response.is_error:
-        msg = "HTTP error {} while trying to obtain document details via REST API at {}."
-        sys.exit(msg.format(response.status_code, args.url))
-    
-    data['custom_fields'] = response.json()['custom_fields']
-
-    # Update custom field value "in-place" if already attached to document (to keep custom field order)
-    if any(custom_field['field'] == args.custom_field_id for custom_field in data['custom_fields']):
-        data['custom_fields'] = [(new_field if custom_field['field'] == args.custom_field_id else custom_field) for custom_field in data['custom_fields']]
-    # Otherwise, simply append to the list
-    else:
-        data['custom_fields'] = data['custom_fields'].append(new_field)
-
-if data:
-    response = httpx.patch(f"{args.url}/api/documents/{args.id}/", headers=headers, json=data)
-
-    if response.is_error:
-        msg = "HTTP error {} while trying to update document via REST API at {}."
-        sys.exit(msg.format(response.status_code, args.url, data))
-
-    print(f"Document with ID {args.id} successfully updated")