Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to provide HTTP headers #239

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)

- Added publish.yml to automatically publish new releases to PyPI [#236](https://github.com/stac-utils/stac-validator/pull/236)
- Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238)
- Allow to provide HTTP headers [#239](https://github.com/stac-utils/stac-validator/pull/239)

## [v3.4.0] - 2024-10-08

Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ Options:
with --pages. Defaults to one page.
--no-assets-urls Disables the opening of href links when validating
assets (enabled by default).
--header KEY VALUE HTTP header to include in the requests. Can be used
multiple times.
-p, --pages INTEGER Maximum number of pages to validate via --item-
collection. Defaults to one page.
-v, --verbose Enables verbose output for recursive mode.
Expand Down Expand Up @@ -332,3 +334,9 @@ stac-validator https://spot-canada-ortho.s3.amazonaws.com/catalog.json --recursi
```bash
stac-validator https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items --item-collection --pages 2
```

**--header**

```bash
stac-validator https://stac-catalog.eu/collections/sentinel-s2-l2a/items --header x-api-key $MY_API_KEY --header foo bar
```
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ black
pytest
pytest-mypy
pre-commit
requests-mock
types-jsonschema
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
extras_require={
"dev": [
"pytest",
"requests-mock",
"types-setuptools",
],
},
Expand All @@ -41,5 +42,5 @@
"console_scripts": ["stac-validator = stac_validator.stac_validator:main"]
},
python_requires=">=3.8",
tests_require=["pytest"],
tests_require=["pytest", "requests-mock"],
)
9 changes: 9 additions & 0 deletions stac_validator/stac_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ def collections_summary(message: List[Dict[str, Any]]) -> None:
is_flag=True,
help="Disables the opening of href links when validating assets (enabled by default).",
)
@click.option(
"--header",
type=(str, str),
multiple=True,
help="HTTP header to include in the requests. Can be used multiple times.",
)
@click.option(
"--pages",
"-p",
Expand All @@ -134,6 +140,7 @@ def main(
collections: bool,
item_collection: bool,
no_assets_urls: bool,
header: list,
pages: int,
recursive: bool,
max_depth: int,
Expand All @@ -154,6 +161,7 @@ def main(
collections (bool): Validate response from /collections endpoint.
item_collection (bool): Whether to validate item collection responses.
no_assets_urls (bool): Whether to open href links when validating assets (enabled by default).
headers (dict): HTTP headers to include in the requests.
pages (int): Maximum number of pages to validate via `item_collection`.
recursive (bool): Whether to recursively validate all related STAC objects.
max_depth (int): Maximum depth to traverse when recursing.
Expand Down Expand Up @@ -185,6 +193,7 @@ def main(
links=links,
assets=assets,
assets_open_urls=not no_assets_urls,
headers=dict(header),
extensions=extensions,
custom=custom,
verbose=verbose,
Expand Down
17 changes: 9 additions & 8 deletions stac_validator/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import ssl
from typing import Dict
from urllib.parse import urlparse
from urllib.request import urlopen
from urllib.request import Request, urlopen

import requests # type: ignore

Expand Down Expand Up @@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str:
return str(e)


def fetch_and_parse_file(input_path: str) -> Dict:
def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict:
"""Fetches and parses a JSON file from a URL or local file.

Given a URL or local file path to a JSON file, this function fetches the file,
Expand All @@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:

Args:
input_path: A string representing the URL or local file path to the JSON file.
headers: For URLs: HTTP headers to include in the request

Returns:
A dictionary containing the parsed contents of the JSON file.
Expand All @@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
"""
try:
if is_url(input_path):
resp = requests.get(input_path)
resp = requests.get(input_path, headers=headers)
resp.raise_for_status()
data = resp.json()
else:
Expand Down Expand Up @@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str:


def link_request(
link: Dict,
initial_message: Dict,
open_urls: bool = True,
link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {}
) -> None:
"""Makes a request to a URL and appends it to the relevant field of the initial message.

Expand All @@ -161,6 +160,7 @@ def link_request(
initial_message: A dictionary containing lists for "request_valid", "request_invalid",
"format_valid", and "format_invalid" URLs.
open_urls: Whether to open link href URL
headers: HTTP headers to include in the request

Returns:
None
Expand All @@ -169,11 +169,12 @@ def link_request(
if is_url(link["href"]):
try:
if open_urls:
request = Request(link["href"], headers=headers)
if "s3" in link["href"]:
context = ssl._create_unverified_context()
response = urlopen(link["href"], context=context)
response = urlopen(request, context=context)
else:
response = urlopen(link["href"])
response = urlopen(request)
status_code = response.getcode()
if status_code == 200:
initial_message["request_valid"].append(link["href"])
Expand Down
21 changes: 14 additions & 7 deletions stac_validator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class StacValidate:
links (bool): Whether to additionally validate links (only works in default mode).
assets (bool): Whether to additionally validate assets (only works in default mode).
assets_open_urls (bool): Whether to open assets URLs when validating assets.
headers (dict): HTTP headers to include in the requests.
extensions (bool): Whether to only validate STAC object extensions.
custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object.
verbose (bool): Whether to enable verbose output in recursive mode.
Expand All @@ -56,6 +57,7 @@ def __init__(
links: bool = False,
assets: bool = False,
assets_open_urls: bool = True,
headers: dict = {},
extensions: bool = False,
custom: str = "",
verbose: bool = False,
Expand All @@ -70,6 +72,7 @@ def __init__(
self.links = links
self.assets = assets
self.assets_open_urls = assets_open_urls
self.headers: Dict = headers
self.recursive = recursive
self.max_depth = max_depth
self.extensions = extensions
Expand Down Expand Up @@ -125,7 +128,9 @@ def assets_validator(self) -> Dict:
assets = self.stac_content.get("assets")
if assets:
for asset in assets.values():
link_request(asset, initial_message, self.assets_open_urls)
link_request(
asset, initial_message, self.assets_open_urls, self.headers
)
return initial_message

def links_validator(self) -> Dict:
Expand All @@ -145,7 +150,7 @@ def links_validator(self) -> Dict:
for link in self.stac_content["links"]:
if not is_valid_url(link["href"]):
link["href"] = root_url + link["href"][1:]
link_request(link, initial_message)
link_request(link, initial_message, True, self.headers)

return initial_message

Expand Down Expand Up @@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool:
self.stac_file = st + "/" + address
else:
self.stac_file = address
self.stac_content = fetch_and_parse_file(str(self.stac_file))
self.stac_content = fetch_and_parse_file(
str(self.stac_file), self.headers
)
self.stac_content["stac_version"] = self.version
stac_type = get_stac_type(self.stac_content).lower()

Expand Down Expand Up @@ -414,7 +421,7 @@ def validate_collections(self) -> None:
Returns:
None
"""
collections = fetch_and_parse_file(str(self.stac_file))
collections = fetch_and_parse_file(str(self.stac_file), self.headers)
for collection in collections["collections"]:
self.schema = ""
self.validate_dict(collection)
Expand All @@ -437,7 +444,7 @@ def validate_item_collection(self) -> None:
"""
page = 1
print(f"processing page {page}")
item_collection = fetch_and_parse_file(str(self.stac_file))
item_collection = fetch_and_parse_file(str(self.stac_file), self.headers)
self.validate_item_collection_dict(item_collection)
try:
if self.pages is not None:
Expand All @@ -450,7 +457,7 @@ def validate_item_collection(self) -> None:
next_link = link["href"]
self.stac_file = next_link
item_collection = fetch_and_parse_file(
str(self.stac_file)
str(self.stac_file), self.headers
)
self.validate_item_collection_dict(item_collection)
break
Expand Down Expand Up @@ -489,7 +496,7 @@ def run(self) -> bool:
and not self.item_collection
and not self.collections
):
self.stac_content = fetch_and_parse_file(self.stac_file)
self.stac_content = fetch_and_parse_file(self.stac_file, self.headers)

stac_type = get_stac_type(self.stac_content).upper()
self.version = self.stac_content["stac_version"]
Expand Down
50 changes: 50 additions & 0 deletions tests/test_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Description: Test --header option

"""

import json

import requests_mock

from stac_validator import stac_validator


def test_header():
stac_file = "tests/test_data/v110/simple-item.json"
url = "https://localhost/" + stac_file

no_headers = {}
valid_headers = {"x-api-key": "a-valid-api-key"}

with requests_mock.Mocker(real_http=True) as mock, open(stac_file) as json_data:
mock.get(url, request_headers=no_headers, status_code=403)
mock.get(url, request_headers=valid_headers, json=json.load(json_data))

stac = stac_validator.StacValidate(url, core=True, headers=valid_headers)
stac.run()
assert stac.message == [
{
"version": "1.1.0",
"path": "https://localhost/tests/test_data/v110/simple-item.json",
"schema": [
"https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json"
],
"valid_stac": True,
"asset_type": "ITEM",
"validation_method": "core",
}
]

stac = stac_validator.StacValidate(url, core=True, headers=no_headers)
stac.run()
assert stac.message == [
{
"version": "",
"path": "https://localhost/tests/test_data/v110/simple-item.json",
"schema": [""],
"valid_stac": False,
"error_type": "HTTPError",
"error_message": "403 Client Error: None for url: https://localhost/tests/test_data/v110/simple-item.json",
}
]
4 changes: 3 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
envlist = py38,py39,py310,py311,py312,py313

[testenv]
deps = pytest
deps =
pytest
requests-mock
commands = pytest
2 changes: 1 addition & 1 deletion tox/Dockerfile-tox
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ COPY . /code/
RUN export LC_ALL=C.UTF-8 && \
export LANG=C.UTF-8 && \
pip3 install . && \
pip3 install tox==4.0.11 && \
pip3 install tox==4.23.2 && \
tox
Loading