Skip to content

Commit

Permalink
Merge pull request #59 from enram/aloftdata
Browse files Browse the repository at this point in the history
Should we use production bucket and inventory?
  • Loading branch information
peterdesmet authored Aug 24, 2023
2 parents 52af3b7 + 8980885 commit cdf05fb
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 64 deletions.
2 changes: 1 addition & 1 deletion src/vptstools/bin/transfer_baltrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
AWS_SNS_TOPIC = os.environ.get("SNS_TOPIC")
AWS_PROFILE = os.environ.get("AWS_PROFILE", None)
AWS_REGION = os.environ.get("AWS_REGION", None)
DESTINATION_BUCKET = os.environ.get("DESTINATION_BUCKET", "aloft")
DESTINATION_BUCKET = os.environ.get("DESTINATION_BUCKET", "inbo-aloft-uat-eu-west-1-default")

# Update reporting to SNS functionality
report_sns = partial(report_click_exception_to_sns,
Expand Down
4 changes: 2 additions & 2 deletions src/vptstools/bin/vph5_to_vpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
# (load_dotenv doesn't override existing environment variables)
load_dotenv()

S3_BUCKET = os.environ.get("DESTINATION_BUCKET", "aloft")
INVENTORY_BUCKET = os.environ.get("INVENTORY_BUCKET", "aloft-inventory")
S3_BUCKET = os.environ.get("DESTINATION_BUCKET", "inbo-aloft-uat-eu-west-1-default")
INVENTORY_BUCKET = os.environ.get("INVENTORY_BUCKET", "inbo-aloft-uat-eu-west-1-inventory")
AWS_SNS_TOPIC = os.environ.get("SNS_TOPIC")
AWS_PROFILE = os.environ.get("AWS_PROFILE", None)
AWS_REGION = os.environ.get("AWS_REGION", "eu-west-1")
Expand Down
4 changes: 2 additions & 2 deletions src/vptstools/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def s3_path_setup(self, file_output):
"""Common setup of the S3 bucket logic"""
return f"{self.source}/{file_output}/{self.radar_code}/{self.year}"

def s3_url_h5(self, bucket="aloft"):
"""Full S3 URL for the stored HDF5 file"""
def s3_url_h5(self, bucket="aloftdata"):
"""Full S3 URL for the stored h5 file"""
return (
f"s3://{bucket}/{self.s3_path_setup('hdf5')}/"
f"{self.month}/{self.day}/{self.file_name}"
Expand Down
2 changes: 1 addition & 1 deletion src/vptstools/vpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def vp(file_path, vpts_csv_version="v1.0", source_file=""):
>>> file_path = Path("bejab_vp_20221111T233000Z_0x9.h5")
>>> vp(file_path)
>>> vp(file_path,
... source_file="s3://aloft/baltrad/hdf5/2022/11/11/bejab_vp_20221111T233000Z_0x9.h5") #noqa
... source_file="s3://aloftdata/baltrad/hdf5/2022/11/11/bejab_vp_20221111T233000Z_0x9.h5") #noqa
Use file name itself as source_file representation in VP file using a custom
callable function
Expand Down
4 changes: 2 additions & 2 deletions src/vptstools/vpts_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ def check_source_file(source_file, regex):
Examples
--------
>>> check_source_file("s3://alof/baltrad/2023/01/01/"
>>> check_source_file("s3://aloftdata/baltrad/2023/01/01/"
... "bejab_vp_20230101T000500Z_0x9.h5",
... r".*h5")
's3://alof/baltrad/2023/01/01/bejab_vp_20230101T000500Z_0x9.h5'
's3://aloftdata/baltrad/2023/01/01/bejab_vp_20230101T000500Z_0x9.h5'
"""
sf_regex = re.compile(regex)
if re.match(sf_regex, source_file):
Expand Down
23 changes: 15 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import datetime
import os
from pathlib import Path
from typing import Callable, Any
from unittest.mock import MagicMock, patch
Expand Down Expand Up @@ -125,6 +126,12 @@ def patched_convert_to_response_dict(

# ----------------------------------------------------------------------------------------------------------------------

def pytest_generate_tests(metafunc):
"""Set bucket and inventory to dummy versions as environmental variables during testing"""
os.environ['DESTINATION_BUCKET'] = "dummy-aloftdata"
os.environ['INVENTORY_BUCKET'] = "dummy-inventory"

# ----------------------------------------------------------------------------------------------------------------------

@pytest.fixture
def path_with_vp():
Expand Down Expand Up @@ -269,31 +276,31 @@ def s3_inventory(aws_credentials, path_inventory):
s3 = boto3.client("s3")
# Add S3 inventory setup
s3.create_bucket(
Bucket="aloft-inventory",
Bucket="dummy-inventory",
CreateBucketConfiguration={"LocationConstraint": "eu-west-1"},
)
with open(manifest, "rb") as manifest_file:
s3.upload_fileobj(
manifest_file,
"aloft-inventory",
"aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
"dummy-inventory",
"dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
)
with open(inventory, "rb") as inventory_file:
s3.upload_fileobj(
inventory_file,
"aloft-inventory",
"aloft/aloft-hdf5-files-inventory/data/dummy_inventory.csv.gz",
"dummy-inventory",
"dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/data/dummy_inventory.csv.gz",
)

# Add example data to aloft mocked S3 bucket
# Add example data to aloftdata mocked S3 bucket
s3.create_bucket(
Bucket="aloft",
Bucket="dummy-aloftdata",
CreateBucketConfiguration={"LocationConstraint": "eu-west-1"},
)
for h5file in (path_inventory / "vp").glob("*.h5"):
with open(h5file, "rb") as h5f:
s3.upload_fileobj(
h5f, "aloft", f"baltrad/hdf5/nosta/2023/03/11/{h5file.name}"
h5f, "dummy-aloftdata", f"baltrad/hdf5/nosta/2023/03/11/{h5file.name}"
)
yield s3

Expand Down
Binary file modified tests/data/inventory/dummy_inventory.csv.gz
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/data/inventory/dummy_manifest.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"sourceBucket" : "aloft",
"destinationBucket" : "arn:aws:s3:::aloft-inventory",
"sourceBucket" : "dummy-aloftdata",
"destinationBucket" : "arn:aws:s3:::dummy-inventory",
"version" : "2016-11-30",
"creationTimestamp" : "1678582800000",
"fileFormat" : "CSV",
"fileSchema" : "Bucket, Key, Size, LastModifiedDate",
"files" : [ {
"key" : "aloft/aloft-hdf5-files-inventory/data/dummy_inventory.csv.gz",
"key" : "dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/data/dummy_inventory.csv.gz",
"size" : 1,
"MD5checksum" : "DUMMY"
}]
Expand Down
24 changes: 12 additions & 12 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def test_parse_file_from_inventory(self, file_path, components):
odim_path.s3_path_setup(file_type)
== f"{source}/{file_type}/{radar_code}/{year}"
)
bucket = "aloft"
bucket = "dummy-aloftdata"
assert (
odim_path.s3_url_h5(bucket)
== f"s3://{bucket}/{source}/{file_type}/{radar_code}/"
Expand Down Expand Up @@ -428,12 +428,12 @@ def test_list_manifest_file_keys(self, s3_inventory):
"""Individual inventory items are correctly parsed from manifest file"""
inventory_files = list(
list_manifest_file_keys(
"aloft-inventory/aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json"
"dummy-inventory/dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json"
)
)
assert len(inventory_files) == 1
assert (
inventory_files[0]["key"] == "aloft/aloft-hdf5-files-inventory/data/"
inventory_files[0]["key"] == "dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/data/"
"dummy_inventory.csv.gz"
)

Expand All @@ -452,13 +452,13 @@ def test_list_manifest_file_keys_with_profile(self, s3_inventory, tmp_path):
# run inventory with alternative profile
inventory_files = list(
list_manifest_file_keys(
"aloft-inventory/aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
"dummy-inventory/dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
storage_options={"profile": "my-aws-profile"},
)
)
assert len(inventory_files) == 1
assert (
inventory_files[0]["key"] == "aloft/aloft-hdf5-files-inventory/data/"
inventory_files[0]["key"] == "dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/data/"
"dummy_inventory.csv.gz"
)
# clean up env variable
Expand All @@ -472,7 +472,7 @@ def test_handle_manifest_all(self, s3_inventory):
return_value=pd.Timestamp("2023-02-01 00:00:00", tz="UTC"),
):
df_cov, days_to_create_vpts = handle_manifest(
"s3://aloft-inventory/aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
"s3://dummy-inventory/dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
modified_days_ago="60days",
) # large enough number to get all inventory 'modified' items
# When date-modified implies full scan, df_cov and days_to_create_vpts are the same
Expand All @@ -495,7 +495,7 @@ def test_handle_manifest_subset(self, s3_inventory):
return_value=pd.Timestamp("2023-02-01 00:00:00", tz="UTC"),
):
df_cov, days_to_create_vpts = handle_manifest(
"s3://aloft-inventory/aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
"s3://dummy-inventory/dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
modified_days_ago="5days",
) # only subset of files is within the time window of days
# Coverage returns the full inventory overview
Expand All @@ -517,7 +517,7 @@ def test_handle_manifest_none(self, s3_inventory):
return_value=pd.Timestamp("2023-03-01 00:00:00", tz="UTC"),
):
df_cov, days_to_create_vpts = handle_manifest(
"s3://aloft-inventory/aloft/aloft-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
"s3://dummy-inventory/dummy-aloftdata/dummy-aloftdata-hdf5-files-inventory/2023-02-01T01-00Z/manifest.json",
modified_days_ago="1days",
) # only subset of files is within the time window of days
# Coverage returns the full inventory overview
Expand All @@ -535,25 +535,25 @@ def test_handle_inventory_alternative_suffix(self):
df_inventory = pd.DataFrame(
[
{
"repo": "aloft",
"repo": "dummy-aloftdata",
"file": "baltrad/coverage.csv",
"size": 1,
"modified": pd.Timestamp("2023-01-31 00:00:00+0000", tz="UTC"),
},
{
"repo": "aloft",
"repo": "dummy-aloftdata",
"file": "baltrad/inventory.csv.gz",
"size": 1,
"modified": pd.Timestamp("2023-01-31 00:00:00+0000", tz="UTC"),
},
{
"repo": "aloft",
"repo": "dummy-aloftdata",
"file": "baltrad/manifest.json",
"size": 1,
"modified": pd.Timestamp("2023-01-31 00:00:00+0000", tz="UTC"),
},
{
"repo": "aloft",
"repo": "dummy-aloftdata",
"file": "baltrad/14azd6.checksum",
"size": 1,
"modified": pd.Timestamp("2023-01-31 00:00:00+0000", tz="UTC"),
Expand Down
65 changes: 35 additions & 30 deletions tests/test_vph5_to_vpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,40 +27,45 @@ def test_e2e_cli(s3_inventory, path_inventory, tmp_path):
"pandas.Timestamp.now",
return_value=pd.Timestamp("2023-02-02 00:00:00", tz="UTC"),
):

# Run CLI command `vph5_to_vpts` with limited modified period check to 3 days
runner = CliRunner()
result = runner.invoke(cli, ["--modified-days-ago", str(3)])

# Check individual steps of the CLI command
assert "Create 1 daily VPTS files" in result.output
assert "Create 1 monthly VPTS files" in result.output
assert "Finished VPTS update procedure" in result.output
assert result.exception is None

# Compare resulting coverage file with reference coverage ---------------------
with open(tmp_path / "coverage.csv", "wb") as f:
s3_inventory.download_fileobj("aloft", "coverage.csv", f)
filecmp.cmp(path_inventory / "coverage.csv", tmp_path / "coverage.csv")

# Compare resulting daily file
with open(tmp_path / "nosta_vpts_20230311.csv", "wb") as f:
s3_inventory.download_fileobj(
"aloft", "baltrad/daily/nosta/2023/nosta_vpts_20230311.csv", f
with runner.isolated_filesystem():

result = runner.invoke(cli,
["--modified-days-ago", str(3)])

# Check individual steps of the CLI command
print(result.output)
assert "Create 1 daily VPTS files" in result.output
assert "Create 1 monthly VPTS files" in result.output
assert "Finished VPTS update procedure" in result.output
assert result.exception is None

# Compare resulting coverage file with reference coverage ---------------------
with open(tmp_path / "coverage.csv", "wb") as f:
s3_inventory.download_fileobj("dummy-aloftdata", "coverage.csv", f)
filecmp.cmp(path_inventory / "coverage.csv", tmp_path / "coverage.csv")

# Compare resulting daily file
with open(tmp_path / "nosta_vpts_20230311.csv", "wb") as f:
s3_inventory.download_fileobj(
"dummy-aloftdata", "baltrad/daily/nosta/2023/nosta_vpts_20230311.csv", f
)
filecmp.cmp(
path_inventory / "nosta_vpts_20230311.csv",
tmp_path / "nosta_vpts_20230311.csv",
)
filecmp.cmp(
path_inventory / "nosta_vpts_20230311.csv",
tmp_path / "nosta_vpts_20230311.csv",
)

# Compare resulting monthly file
with open(tmp_path / "nosta_vpts_202303.csv.gz", "wb") as f:
s3_inventory.download_fileobj(
"aloft", "baltrad/monthly/nosta/2023/nosta_vpts_202303.csv.gz", f

# Compare resulting monthly file
with open(tmp_path / "nosta_vpts_202303.csv.gz", "wb") as f:
s3_inventory.download_fileobj(
"dummy-aloftdata", "baltrad/monthly/nosta/2023/nosta_vpts_202303.csv.gz", f
)
filecmp.cmp(
path_inventory / "nosta_vpts_202303.csv.gz",
tmp_path / "nosta_vpts_202303.csv.gz",
)
filecmp.cmp(
path_inventory / "nosta_vpts_202303.csv.gz",
tmp_path / "nosta_vpts_202303.csv.gz",
)


def test_e2e_cli_all(s3_inventory, path_inventory, tmp_path, sns):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_vpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _convert_to_source_dummy(file_path):

def _convert_to_source_s3(file_path):
"""Return the file name itself from a file path"""
return OdimFilePath.from_file_name(file_path, source="baltrad").s3_url_h5("aloft")
return OdimFilePath.from_file_name(file_path, source="baltrad").s3_url_h5("dummy-aloftdata")


@pytest.mark.parametrize("vpts_version", ["v1.0"])
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_vp_custom_callable_file(self, vpts_version, path_with_vp):

# Use a conversion to S3 function
df_vp = vp(file_path, vpts_version, _convert_to_source_s3)
assert df_vp["source_file"].str.startswith("s3://aloft/baltrad").all()
assert df_vp["source_file"].str.startswith("s3://dummy-aloftdata/baltrad").all()

def test_vpts_no_source_file(self, vpts_version, path_with_vp):
"""The file name itself is used when no source_file reference is provided"""
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_vpts_custom_callable_file(self, vpts_version, path_with_vp):

# Use a conversion to S3 function
df_vpts = vpts(file_paths, vpts_version, _convert_to_source_s3)
assert df_vpts["source_file"].str.startswith("s3://aloft/baltrad").all()
assert df_vpts["source_file"].str.startswith("s3://dummy-aloftdata/baltrad").all()

def test_vp_invalid_file(self, vpts_version, path_with_wrong_h5): # noqa
"""Invalid HDF5 VP file raises InvalidSourceODIM exceptin"""
Expand Down

0 comments on commit cdf05fb

Please sign in to comment.